aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/SystemZ
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/SystemZ')
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp1298
-rw-r--r--contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp482
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp234
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h78
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp129
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp29
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h26
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp307
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h32
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp167
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp253
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h109
-rw-r--r--contrib/llvm/lib/Target/SystemZ/README.txt159
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.h185
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.td78
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp527
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h42
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp21
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h130
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td122
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp52
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h58
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp582
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp153
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td213
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp562
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h64
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp337
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h128
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp1420
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp6397
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h592
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h46
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td231
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td521
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td4545
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td240
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp1807
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h326
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td2117
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td517
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td1204
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp146
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp472
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp103
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h44
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h79
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp153
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h112
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td595
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperators.td683
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td169
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td35
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp159
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h67
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td316
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td81
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td1503
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td1196
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td1241
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp275
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h74
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp285
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp68
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h179
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp382
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp222
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h58
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp888
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h91
-rw-r--r--contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp23
72 files changed, 36236 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
new file mode 100644
index 000000000000..ee23692ad1db
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -0,0 +1,1298 @@
+//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+
+using namespace llvm;
+
+// Return true if Expr is in the range [MinValue, MaxValue].
+static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
+ if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
+ int64_t Value = CE->getValue();
+ return Value >= MinValue && Value <= MaxValue;
+ }
+ return false;
+}
+
+namespace {
+
+enum RegisterKind {
+ GR32Reg,
+ GRH32Reg,
+ GR64Reg,
+ GR128Reg,
+ ADDR32Reg,
+ ADDR64Reg,
+ FP32Reg,
+ FP64Reg,
+ FP128Reg,
+ VR32Reg,
+ VR64Reg,
+ VR128Reg,
+ AR32Reg,
+ CR64Reg,
+};
+
+enum MemoryKind {
+ BDMem,
+ BDXMem,
+ BDLMem,
+ BDRMem,
+ BDVMem
+};
+
+class SystemZOperand : public MCParsedAsmOperand {
+private:
+ enum OperandKind {
+ KindInvalid,
+ KindToken,
+ KindReg,
+ KindImm,
+ KindImmTLS,
+ KindMem
+ };
+
+ OperandKind Kind;
+ SMLoc StartLoc, EndLoc;
+
+ // A string of length Length, starting at Data.
+ struct TokenOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ // LLVM register Num, which has kind Kind. In some ways it might be
+ // easier for this class to have a register bank (general, floating-point
+ // or access) and a raw register number (0-15). This would postpone the
+ // interpretation of the operand to the add*() methods and avoid the need
+ // for context-dependent parsing. However, we do things the current way
+ // because of the virtual getReg() method, which needs to distinguish
+ // between (say) %r0 used as a single register and %r0 used as a pair.
+ // Context-dependent parsing can also give us slightly better error
+ // messages when invalid pairs like %r1 are used.
+ struct RegOp {
+ RegisterKind Kind;
+ unsigned Num;
+ };
+
+ // Base + Disp + Index, where Base and Index are LLVM registers or 0.
+ // MemKind says what type of memory this is and RegKind says what type
+ // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand
+ // length for D(L,B)-style operands, otherwise it is null.
+ struct MemOp {
+ unsigned Base : 12;
+ unsigned Index : 12;
+ unsigned MemKind : 4;
+ unsigned RegKind : 4;
+ const MCExpr *Disp;
+ union {
+ const MCExpr *Imm;
+ unsigned Reg;
+ } Length;
+ };
+
+ // Imm is an immediate operand, and Sym is an optional TLS symbol
+ // for use with a __tls_get_offset marker relocation.
+ struct ImmTLSOp {
+ const MCExpr *Imm;
+ const MCExpr *Sym;
+ };
+
+ union {
+ TokenOp Token;
+ RegOp Reg;
+ const MCExpr *Imm;
+ ImmTLSOp ImmTLS;
+ MemOp Mem;
+ };
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (!Expr)
+ Inst.addOperand(MCOperand::createImm(0));
+ else if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+public:
+ SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
+ : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) {}
+
+ // Create particular kinds of operand.
+ static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc,
+ SMLoc EndLoc) {
+ return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
+ }
+
+ static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
+ auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
+ Op->Token.Data = Str.data();
+ Op->Token.Length = Str.size();
+ return Op;
+ }
+
+ static std::unique_ptr<SystemZOperand>
+ createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
+ Op->Reg.Kind = Kind;
+ Op->Reg.Num = Num;
+ return Op;
+ }
+
+ static std::unique_ptr<SystemZOperand>
+ createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
+ Op->Imm = Expr;
+ return Op;
+ }
+
+ static std::unique_ptr<SystemZOperand>
+ createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
+ const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm,
+ unsigned LengthReg, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
+ Op->Mem.MemKind = MemKind;
+ Op->Mem.RegKind = RegKind;
+ Op->Mem.Base = Base;
+ Op->Mem.Index = Index;
+ Op->Mem.Disp = Disp;
+ if (MemKind == BDLMem)
+ Op->Mem.Length.Imm = LengthImm;
+ if (MemKind == BDRMem)
+ Op->Mem.Length.Reg = LengthReg;
+ return Op;
+ }
+
+ static std::unique_ptr<SystemZOperand>
+ createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc);
+ Op->ImmTLS.Imm = Imm;
+ Op->ImmTLS.Sym = Sym;
+ return Op;
+ }
+
+ // Token operands
+ bool isToken() const override {
+ return Kind == KindToken;
+ }
+ StringRef getToken() const {
+ assert(Kind == KindToken && "Not a token");
+ return StringRef(Token.Data, Token.Length);
+ }
+
+ // Register operands.
+ bool isReg() const override {
+ return Kind == KindReg;
+ }
+ bool isReg(RegisterKind RegKind) const {
+ return Kind == KindReg && Reg.Kind == RegKind;
+ }
+ unsigned getReg() const override {
+ assert(Kind == KindReg && "Not a register");
+ return Reg.Num;
+ }
+
+ // Immediate operands.
+ bool isImm() const override {
+ return Kind == KindImm;
+ }
+ bool isImm(int64_t MinValue, int64_t MaxValue) const {
+ return Kind == KindImm && inRange(Imm, MinValue, MaxValue);
+ }
+ const MCExpr *getImm() const {
+ assert(Kind == KindImm && "Not an immediate");
+ return Imm;
+ }
+
+ // Immediate operands with optional TLS symbol.
+ bool isImmTLS() const {
+ return Kind == KindImmTLS;
+ }
+
+ // Memory operands.
+ bool isMem() const override {
+ return Kind == KindMem;
+ }
+ bool isMem(MemoryKind MemKind) const {
+ return (Kind == KindMem &&
+ (Mem.MemKind == MemKind ||
+ // A BDMem can be treated as a BDXMem in which the index
+ // register field is 0.
+ (Mem.MemKind == BDMem && MemKind == BDXMem)));
+ }
+ bool isMem(MemoryKind MemKind, RegisterKind RegKind) const {
+ return isMem(MemKind) && Mem.RegKind == RegKind;
+ }
+ bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const {
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff);
+ }
+ bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const {
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
+ }
+ bool isMemDisp12Len4(RegisterKind RegKind) const {
+ return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x10);
+ }
+ bool isMemDisp12Len8(RegisterKind RegKind) const {
+ return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100);
+ }
+
+ // Override MCParsedAsmOperand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+ SMLoc getEndLoc() const override { return EndLoc; }
+ void print(raw_ostream &OS) const override;
+
+ // Used by the TableGen code to add particular types of operand
+ // to an instruction.
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ addExpr(Inst, getImm());
+ }
+ void addBDAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands");
+ assert(isMem(BDMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ }
+ void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(isMem(BDXMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ Inst.addOperand(MCOperand::createReg(Mem.Index));
+ }
+ void addBDLAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(isMem(BDLMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ addExpr(Inst, Mem.Length.Imm);
+ }
+ void addBDRAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(isMem(BDRMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ Inst.addOperand(MCOperand::createReg(Mem.Length.Reg));
+ }
+ void addBDVAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(isMem(BDVMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ Inst.addOperand(MCOperand::createReg(Mem.Index));
+ }
+ void addImmTLSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands");
+ assert(Kind == KindImmTLS && "Invalid operand type");
+ addExpr(Inst, ImmTLS.Imm);
+ if (ImmTLS.Sym)
+ addExpr(Inst, ImmTLS.Sym);
+ }
+
+ // Used by the TableGen code to check for particular operand types.
+ bool isGR32() const { return isReg(GR32Reg); }
+ bool isGRH32() const { return isReg(GRH32Reg); }
+ bool isGRX32() const { return false; }
+ bool isGR64() const { return isReg(GR64Reg); }
+ bool isGR128() const { return isReg(GR128Reg); }
+ bool isADDR32() const { return isReg(ADDR32Reg); }
+ bool isADDR64() const { return isReg(ADDR64Reg); }
+ bool isADDR128() const { return false; }
+ bool isFP32() const { return isReg(FP32Reg); }
+ bool isFP64() const { return isReg(FP64Reg); }
+ bool isFP128() const { return isReg(FP128Reg); }
+ bool isVR32() const { return isReg(VR32Reg); }
+ bool isVR64() const { return isReg(VR64Reg); }
+ bool isVF128() const { return false; }
+ bool isVR128() const { return isReg(VR128Reg); }
+ bool isAR32() const { return isReg(AR32Reg); }
+ bool isCR64() const { return isReg(CR64Reg); }
+ bool isAnyReg() const { return (isReg() || isImm(0, 15)); }
+ bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
+ bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
+ bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
+ bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); }
+ bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
+ bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
+ bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(ADDR64Reg); }
+ bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
+ bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); }
+ bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
+ bool isU1Imm() const { return isImm(0, 1); }
+ bool isU2Imm() const { return isImm(0, 3); }
+ bool isU3Imm() const { return isImm(0, 7); }
+ bool isU4Imm() const { return isImm(0, 15); }
+ bool isU6Imm() const { return isImm(0, 63); }
+ bool isU8Imm() const { return isImm(0, 255); }
+ bool isS8Imm() const { return isImm(-128, 127); }
+ bool isU12Imm() const { return isImm(0, 4095); }
+ bool isU16Imm() const { return isImm(0, 65535); }
+ bool isS16Imm() const { return isImm(-32768, 32767); }
+ bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
+ bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
+ bool isU48Imm() const { return isImm(0, (1LL << 48) - 1); }
+};
+
+class SystemZAsmParser : public MCTargetAsmParser {
+#define GET_ASSEMBLER_HEADER
+#include "SystemZGenAsmMatcher.inc"
+
+private:
+ MCAsmParser &Parser;
+ enum RegisterGroup {
+ RegGR,
+ RegFP,
+ RegV,
+ RegAR,
+ RegCR
+ };
+ struct Register {
+ RegisterGroup Group;
+ unsigned Num;
+ SMLoc StartLoc, EndLoc;
+ };
+
+ bool parseRegister(Register &Reg);
+
+ bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs,
+ bool IsAddress = false);
+
+ OperandMatchResultTy parseRegister(OperandVector &Operands,
+ RegisterGroup Group, const unsigned *Regs,
+ RegisterKind Kind);
+
+ OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+
+ bool parseAddress(bool &HaveReg1, Register &Reg1,
+ bool &HaveReg2, Register &Reg2,
+ const MCExpr *&Disp, const MCExpr *&Length);
+ bool parseAddressRegister(Register &Reg);
+
+ bool ParseDirectiveInsn(SMLoc L);
+
+ OperandMatchResultTy parseAddress(OperandVector &Operands,
+ MemoryKind MemKind, const unsigned *Regs,
+ RegisterKind RegKind);
+
+ OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal, bool AllowTLS);
+
+ bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
+
+public:
+ SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, sti), Parser(parser) {
+ MCAsmParserExtension::Initialize(Parser);
+
+ // Alias the .word directive to .short.
+ parser.addAliasForDirective(".word", ".short");
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+ }
+
+ // Override MCTargetAsmParser.
+ bool ParseDirective(AsmToken DirectiveID) override;
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+
+ // Used by the TableGen code to parse particular operand types.
+ OperandMatchResultTy parseGR32(OperandVector &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
+ }
+ OperandMatchResultTy parseGRH32(OperandVector &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
+ }
+ OperandMatchResultTy parseGRX32(OperandVector &Operands) {
+ llvm_unreachable("GRX32 should only be used for pseudo instructions");
+ }
+ OperandMatchResultTy parseGR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
+ }
+ OperandMatchResultTy parseGR128(OperandVector &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg);
+ }
+ OperandMatchResultTy parseADDR32(OperandVector &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg);
+ }
+ OperandMatchResultTy parseADDR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parseADDR128(OperandVector &Operands) {
+ llvm_unreachable("Shouldn't be used as an operand");
+ }
+ OperandMatchResultTy parseFP32(OperandVector &Operands) {
+ return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg);
+ }
+ OperandMatchResultTy parseFP64(OperandVector &Operands) {
+ return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg);
+ }
+ OperandMatchResultTy parseFP128(OperandVector &Operands) {
+ return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
+ }
+ OperandMatchResultTy parseVR32(OperandVector &Operands) {
+ return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg);
+ }
+ OperandMatchResultTy parseVR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg);
+ }
+ OperandMatchResultTy parseVF128(OperandVector &Operands) {
+ llvm_unreachable("Shouldn't be used as an operand");
+ }
+ OperandMatchResultTy parseVR128(OperandVector &Operands) {
+ return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
+ }
+ OperandMatchResultTy parseAR32(OperandVector &Operands) {
+ return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg);
+ }
+ OperandMatchResultTy parseCR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg);
+ }
+ OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
+ return parseAnyRegister(Operands);
+ }
+ OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
+ return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
+ }
+ OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parsePCRel12(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false);
+ }
+ OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false);
+ }
+ OperandMatchResultTy parsePCRel24(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 24), (1LL << 24) - 1, false);
+ }
+ OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false);
+ }
+ OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true);
+ }
+ OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
+ }
+};
+
+} // end anonymous namespace
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "SystemZGenAsmMatcher.inc"
+
+// Used for the .insn directives; contains information needed to parse the
+// operands in the directive.
+struct InsnMatchEntry {
+ StringRef Format;
+ uint64_t Opcode;
+ int32_t NumOperands;
+ MatchClassKind OperandKinds[5];
+};
+
+// For equal_range comparison.
+struct CompareInsn {
+ bool operator() (const InsnMatchEntry &LHS, StringRef RHS) {
+ return LHS.Format < RHS;
+ }
+ bool operator() (StringRef LHS, const InsnMatchEntry &RHS) {
+ return LHS < RHS.Format;
+ }
+ bool operator() (const InsnMatchEntry &LHS, const InsnMatchEntry &RHS) {
+ return LHS.Format < RHS.Format;
+ }
+};
+
+// Table initializing information for parsing the .insn directive.
+static struct InsnMatchEntry InsnMatchTable[] = {
+ /* Format, Opcode, NumOperands, OperandKinds */
+ { "e", SystemZ::InsnE, 1,
+ { MCK_U16Imm } },
+ { "ri", SystemZ::InsnRI, 3,
+ { MCK_U32Imm, MCK_AnyReg, MCK_S16Imm } },
+ { "rie", SystemZ::InsnRIE, 4,
+ { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_PCRel16 } },
+ { "ril", SystemZ::InsnRIL, 3,
+ { MCK_U48Imm, MCK_AnyReg, MCK_PCRel32 } },
+ { "rilu", SystemZ::InsnRILU, 3,
+ { MCK_U48Imm, MCK_AnyReg, MCK_U32Imm } },
+ { "ris", SystemZ::InsnRIS, 5,
+ { MCK_U48Imm, MCK_AnyReg, MCK_S8Imm, MCK_U4Imm, MCK_BDAddr64Disp12 } },
+ { "rr", SystemZ::InsnRR, 3,
+ { MCK_U16Imm, MCK_AnyReg, MCK_AnyReg } },
+ { "rre", SystemZ::InsnRRE, 3,
+ { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg } },
+ { "rrf", SystemZ::InsnRRF, 5,
+ { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm } },
+ { "rrs", SystemZ::InsnRRS, 5,
+ { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm, MCK_BDAddr64Disp12 } },
+ { "rs", SystemZ::InsnRS, 4,
+ { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12 } },
+ { "rse", SystemZ::InsnRSE, 4,
+ { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12 } },
+ { "rsi", SystemZ::InsnRSI, 4,
+ { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_PCRel16 } },
+ { "rsy", SystemZ::InsnRSY, 4,
+ { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp20 } },
+ { "rx", SystemZ::InsnRX, 3,
+ { MCK_U32Imm, MCK_AnyReg, MCK_BDXAddr64Disp12 } },
+ { "rxe", SystemZ::InsnRXE, 3,
+ { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp12 } },
+ { "rxf", SystemZ::InsnRXF, 4,
+ { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDXAddr64Disp12 } },
+ { "rxy", SystemZ::InsnRXY, 3,
+ { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp20 } },
+ { "s", SystemZ::InsnS, 2,
+ { MCK_U32Imm, MCK_BDAddr64Disp12 } },
+ { "si", SystemZ::InsnSI, 3,
+ { MCK_U32Imm, MCK_BDAddr64Disp12, MCK_S8Imm } },
+ { "sil", SystemZ::InsnSIL, 3,
+ { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_U16Imm } },
+ { "siy", SystemZ::InsnSIY, 3,
+ { MCK_U48Imm, MCK_BDAddr64Disp20, MCK_U8Imm } },
+ { "ss", SystemZ::InsnSS, 4,
+ { MCK_U48Imm, MCK_BDXAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } },
+ { "sse", SystemZ::InsnSSE, 3,
+ { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12 } },
+ { "ssf", SystemZ::InsnSSF, 4,
+ { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }
+};
+
+void SystemZOperand::print(raw_ostream &OS) const {
+ llvm_unreachable("Not implemented");
+}
+
+// Parse one register of the form %<prefix><number>.
+bool SystemZAsmParser::parseRegister(Register &Reg) {
+ Reg.StartLoc = Parser.getTok().getLoc();
+
+ // Eat the % prefix.
+ if (Parser.getTok().isNot(AsmToken::Percent))
+ return Error(Parser.getTok().getLoc(), "register expected");
+ Parser.Lex();
+
+ // Expect a register name.
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return Error(Reg.StartLoc, "invalid register");
+
+ // Check that there's a prefix.
+ StringRef Name = Parser.getTok().getString();
+ if (Name.size() < 2)
+ return Error(Reg.StartLoc, "invalid register");
+ char Prefix = Name[0];
+
+ // Treat the rest of the register name as a register number.
+ if (Name.substr(1).getAsInteger(10, Reg.Num))
+ return Error(Reg.StartLoc, "invalid register");
+
+ // Look for valid combinations of prefix and number.
+ if (Prefix == 'r' && Reg.Num < 16)
+ Reg.Group = RegGR;
+ else if (Prefix == 'f' && Reg.Num < 16)
+ Reg.Group = RegFP;
+ else if (Prefix == 'v' && Reg.Num < 32)
+ Reg.Group = RegV;
+ else if (Prefix == 'a' && Reg.Num < 16)
+ Reg.Group = RegAR;
+ else if (Prefix == 'c' && Reg.Num < 16)
+ Reg.Group = RegCR;
+ else
+ return Error(Reg.StartLoc, "invalid register");
+
+ Reg.EndLoc = Parser.getTok().getLoc();
+ Parser.Lex();
+ return false;
+}
+
+// Parse a register of group Group. If Regs is nonnull, use it to map
+// the raw register number to LLVM numbering, with zero entries
+// indicating an invalid register. IsAddress says whether the
+// register appears in an address context. Allow FP Group if expecting
+// RegV Group, since the f-prefix yields the FP group even while used
+// with vector instructions.
+bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
+ const unsigned *Regs, bool IsAddress) {
+ if (parseRegister(Reg))
+ return true;
+ if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV))
+ return Error(Reg.StartLoc, "invalid operand for instruction");
+ if (Regs && Regs[Reg.Num] == 0)
+ return Error(Reg.StartLoc, "invalid register pair");
+ if (Reg.Num == 0 && IsAddress)
+ return Error(Reg.StartLoc, "%r0 used in an address");
+ if (Regs)
+ Reg.Num = Regs[Reg.Num];
+ return false;
+}
+
+// Parse a register and add it to Operands. The other arguments are as above.
+OperandMatchResultTy
+SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
+ const unsigned *Regs, RegisterKind Kind) {
+ if (Parser.getTok().isNot(AsmToken::Percent))
+ return MatchOperand_NoMatch;
+
+ Register Reg;
+ bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg);
+ if (parseRegister(Reg, Group, Regs, IsAddress))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num,
+ Reg.StartLoc, Reg.EndLoc));
+ return MatchOperand_Success;
+}
+
+// Parse any type of register (including integers) and add it to Operands.
+OperandMatchResultTy
+SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
+ // Handle integer values.
+ if (Parser.getTok().is(AsmToken::Integer)) {
+ const MCExpr *Register;
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(Register))
+ return MatchOperand_ParseFail;
+
+ if (auto *CE = dyn_cast<MCConstantExpr>(Register)) {
+ int64_t Value = CE->getValue();
+ if (Value < 0 || Value > 15) {
+ Error(StartLoc, "invalid register");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ Operands.push_back(SystemZOperand::createImm(Register, StartLoc, EndLoc));
+ }
+ else {
+ Register Reg;
+ if (parseRegister(Reg))
+ return MatchOperand_ParseFail;
+
+ // Map to the correct register kind.
+ RegisterKind Kind;
+ unsigned RegNo;
+ if (Reg.Group == RegGR) {
+ Kind = GR64Reg;
+ RegNo = SystemZMC::GR64Regs[Reg.Num];
+ }
+ else if (Reg.Group == RegFP) {
+ Kind = FP64Reg;
+ RegNo = SystemZMC::FP64Regs[Reg.Num];
+ }
+ else if (Reg.Group == RegV) {
+ Kind = VR128Reg;
+ RegNo = SystemZMC::VR128Regs[Reg.Num];
+ }
+ else if (Reg.Group == RegAR) {
+ Kind = AR32Reg;
+ RegNo = SystemZMC::AR32Regs[Reg.Num];
+ }
+ else if (Reg.Group == RegCR) {
+ Kind = CR64Reg;
+ RegNo = SystemZMC::CR64Regs[Reg.Num];
+ }
+ else {
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(SystemZOperand::createReg(Kind, RegNo,
+ Reg.StartLoc, Reg.EndLoc));
+ }
+ return MatchOperand_Success;
+}
+
+// Parse a memory operand into Reg1, Reg2, Disp, and Length.
+bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
+ bool &HaveReg2, Register &Reg2,
+ const MCExpr *&Disp,
+ const MCExpr *&Length) {
+ // Parse the displacement, which must always be present.
+ if (getParser().parseExpression(Disp))
+ return true;
+
+ // Parse the optional base and index.
+ HaveReg1 = false;
+ HaveReg2 = false;
+ Length = nullptr;
+ if (getLexer().is(AsmToken::LParen)) {
+ Parser.Lex();
+
+ if (getLexer().is(AsmToken::Percent)) {
+ // Parse the first register.
+ HaveReg1 = true;
+ if (parseRegister(Reg1))
+ return true;
+ } else {
+ // Parse the length.
+ if (getParser().parseExpression(Length))
+ return true;
+ }
+
+ // Check whether there's a second register.
+ if (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex();
+ HaveReg2 = true;
+ if (parseRegister(Reg2))
+ return true;
+ }
+
+ // Consume the closing bracket.
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "unexpected token in address");
+ Parser.Lex();
+ }
+ return false;
+}
+
+// Verify that Reg is a valid address register (base or index).
+bool
+SystemZAsmParser::parseAddressRegister(Register &Reg) {
+ if (Reg.Group == RegV) {
+ Error(Reg.StartLoc, "invalid use of vector addressing");
+ return true;
+ } else if (Reg.Group != RegGR) {
+ Error(Reg.StartLoc, "invalid address register");
+ return true;
+ } else if (Reg.Num == 0) {
+ Error(Reg.StartLoc, "%r0 used in an address");
+ return true;
+ }
+ return false;
+}
+
+// Parse a memory operand and add it to Operands. The other arguments
+// are as above.
+OperandMatchResultTy
+SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
+ const unsigned *Regs, RegisterKind RegKind) {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ unsigned Base = 0, Index = 0, LengthReg = 0;
+ Register Reg1, Reg2;
+ bool HaveReg1, HaveReg2;
+ const MCExpr *Disp;
+ const MCExpr *Length;
+ if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length))
+ return MatchOperand_ParseFail;
+
+ switch (MemKind) {
+ case BDMem:
+ // If we have Reg1, it must be an address register.
+ if (HaveReg1) {
+ if (parseAddressRegister(Reg1))
+ return MatchOperand_ParseFail;
+ Base = Regs[Reg1.Num];
+ }
+ // There must be no Reg2 or length.
+ if (Length) {
+ Error(StartLoc, "invalid use of length addressing");
+ return MatchOperand_ParseFail;
+ }
+ if (HaveReg2) {
+ Error(StartLoc, "invalid use of indexed addressing");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ case BDXMem:
+ // If we have Reg1, it must be an address register.
+ if (HaveReg1) {
+ if (parseAddressRegister(Reg1))
+ return MatchOperand_ParseFail;
+ // If the are two registers, the first one is the index and the
+ // second is the base.
+ if (HaveReg2)
+ Index = Regs[Reg1.Num];
+ else
+ Base = Regs[Reg1.Num];
+ }
+ // If we have Reg2, it must be an address register.
+ if (HaveReg2) {
+ if (parseAddressRegister(Reg2))
+ return MatchOperand_ParseFail;
+ Base = Regs[Reg2.Num];
+ }
+ // There must be no length.
+ if (Length) {
+ Error(StartLoc, "invalid use of length addressing");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ case BDLMem:
+ // If we have Reg2, it must be an address register.
+ if (HaveReg2) {
+ if (parseAddressRegister(Reg2))
+ return MatchOperand_ParseFail;
+ Base = Regs[Reg2.Num];
+ }
+ // We cannot support base+index addressing.
+ if (HaveReg1 && HaveReg2) {
+ Error(StartLoc, "invalid use of indexed addressing");
+ return MatchOperand_ParseFail;
+ }
+ // We must have a length.
+ if (!Length) {
+ Error(StartLoc, "missing length in address");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ case BDRMem:
+ // We must have Reg1, and it must be a GPR.
+ if (!HaveReg1 || Reg1.Group != RegGR) {
+ Error(StartLoc, "invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+ LengthReg = SystemZMC::GR64Regs[Reg1.Num];
+ // If we have Reg2, it must be an address register.
+ if (HaveReg2) {
+ if (parseAddressRegister(Reg2))
+ return MatchOperand_ParseFail;
+ Base = Regs[Reg2.Num];
+ }
+ // There must be no length.
+ if (Length) {
+ Error(StartLoc, "invalid use of length addressing");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ case BDVMem:
+ // We must have Reg1, and it must be a vector register.
+ if (!HaveReg1 || Reg1.Group != RegV) {
+ Error(StartLoc, "vector index required in address");
+ return MatchOperand_ParseFail;
+ }
+ Index = SystemZMC::VR128Regs[Reg1.Num];
+ // If we have Reg2, it must be an address register.
+ if (HaveReg2) {
+ if (parseAddressRegister(Reg2))
+ return MatchOperand_ParseFail;
+ Base = Regs[Reg2.Num];
+ }
+ // There must be no length.
+ if (Length) {
+ Error(StartLoc, "invalid use of length addressing");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ }
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
+ Index, Length, LengthReg,
+ StartLoc, EndLoc));
+ return MatchOperand_Success;
+}
+
+bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+
+ if (IDVal == ".insn")
+ return ParseDirectiveInsn(DirectiveID.getLoc());
+
+ return true;
+}
+
+/// ParseDirectiveInsn
+/// ::= .insn [ format, encoding, (operands (, operands)*) ]
+bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ // Expect instruction format as identifier.
+ StringRef Format;
+ SMLoc ErrorLoc = Parser.getTok().getLoc();
+ if (Parser.parseIdentifier(Format))
+ return Error(ErrorLoc, "expected instruction format");
+
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands;
+
+ // Find entry for this format in InsnMatchTable.
+ auto EntryRange =
+ std::equal_range(std::begin(InsnMatchTable), std::end(InsnMatchTable),
+ Format, CompareInsn());
+
+ // If first == second, couldn't find a match in the table.
+ if (EntryRange.first == EntryRange.second)
+ return Error(ErrorLoc, "unrecognized format");
+
+ struct InsnMatchEntry *Entry = EntryRange.first;
+
+ // Format should match from equal_range.
+ assert(Entry->Format == Format);
+
+ // Parse the following operands using the table's information.
+ for (int i = 0; i < Entry->NumOperands; i++) {
+ MatchClassKind Kind = Entry->OperandKinds[i];
+
+ SMLoc StartLoc = Parser.getTok().getLoc();
+
+ // Always expect commas as separators for operands.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(StartLoc, "unexpected token in directive");
+ Lex();
+
+ // Parse operands.
+ OperandMatchResultTy ResTy;
+ if (Kind == MCK_AnyReg)
+ ResTy = parseAnyReg(Operands);
+ else if (Kind == MCK_BDXAddr64Disp12 || Kind == MCK_BDXAddr64Disp20)
+ ResTy = parseBDXAddr64(Operands);
+ else if (Kind == MCK_BDAddr64Disp12 || Kind == MCK_BDAddr64Disp20)
+ ResTy = parseBDAddr64(Operands);
+ else if (Kind == MCK_PCRel32)
+ ResTy = parsePCRel32(Operands);
+ else if (Kind == MCK_PCRel16)
+ ResTy = parsePCRel16(Operands);
+ else {
+ // Only remaining operand kind is an immediate.
+ const MCExpr *Expr;
+ SMLoc StartLoc = Parser.getTok().getLoc();
+
+ // Expect immediate expression.
+ if (Parser.parseExpression(Expr))
+ return Error(StartLoc, "unexpected token in directive");
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+ ResTy = MatchOperand_Success;
+ }
+
+ if (ResTy != MatchOperand_Success)
+ return true;
+ }
+
+ // Build the instruction with the parsed operands.
+ MCInst Inst = MCInstBuilder(Entry->Opcode);
+
+ for (size_t i = 0; i < Operands.size(); i++) {
+ MCParsedAsmOperand &Operand = *Operands[i];
+ MatchClassKind Kind = Entry->OperandKinds[i];
+
+ // Verify operand.
+ unsigned Res = validateOperandClass(Operand, Kind);
+ if (Res != Match_Success)
+ return Error(Operand.getStartLoc(), "unexpected operand type");
+
+ // Add operands to instruction.
+ SystemZOperand &ZOperand = static_cast<SystemZOperand &>(Operand);
+ if (ZOperand.isReg())
+ ZOperand.addRegOperands(Inst, 1);
+ else if (ZOperand.isMem(BDMem))
+ ZOperand.addBDAddrOperands(Inst, 2);
+ else if (ZOperand.isMem(BDXMem))
+ ZOperand.addBDXAddrOperands(Inst, 3);
+ else if (ZOperand.isImm())
+ ZOperand.addImmOperands(Inst, 1);
+ else
+ llvm_unreachable("unexpected operand type");
+ }
+
+ // Emit as a regular instruction.
+ Parser.getStreamer().EmitInstruction(Inst, getSTI());
+
+ return false;
+}
+
+bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ Register Reg;
+ if (parseRegister(Reg))
+ return true;
+ if (Reg.Group == RegGR)
+ RegNo = SystemZMC::GR64Regs[Reg.Num];
+ else if (Reg.Group == RegFP)
+ RegNo = SystemZMC::FP64Regs[Reg.Num];
+ else if (Reg.Group == RegV)
+ RegNo = SystemZMC::VR128Regs[Reg.Num];
+ else if (Reg.Group == RegAR)
+ RegNo = SystemZMC::AR32Regs[Reg.Num];
+ else if (Reg.Group == RegCR)
+ RegNo = SystemZMC::CR64Regs[Reg.Num];
+ StartLoc = Reg.StartLoc;
+ EndLoc = Reg.EndLoc;
+ return false;
+}
+
+bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (parseOperand(Operands, Name)) {
+ return true;
+ }
+
+ // Read any subsequent operands.
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex();
+ if (parseOperand(Operands, Name)) {
+ return true;
+ }
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ // Consume the EndOfStatement.
+ Parser.Lex();
+ return false;
+}
+
+bool SystemZAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach. Force all
+ // features to be available during the operand check, or else we will fail to
+ // find the custom parser, and then we will later get an InvalidOperand error
+ // instead of a MissingFeature errror.
+ uint64_t AvailableFeatures = getAvailableFeatures();
+ setAvailableFeatures(~(uint64_t)0);
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ setAvailableFeatures(AvailableFeatures);
+ if (ResTy == MatchOperand_Success)
+ return false;
+
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ // Check for a register. All real register operands should have used
+ // a context-dependent parse routine, which gives the required register
+ // class. The code is here to mop up other cases, like those where
+ // the instruction isn't recognized.
+ if (Parser.getTok().is(AsmToken::Percent)) {
+ Register Reg;
+ if (parseRegister(Reg))
+ return true;
+ Operands.push_back(SystemZOperand::createInvalid(Reg.StartLoc, Reg.EndLoc));
+ return false;
+ }
+
+ // The only other type of operand is an immediate or address. As above,
+ // real address operands should have used a context-dependent parse routine,
+ // so we treat any plain expression as an immediate.
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ Register Reg1, Reg2;
+ bool HaveReg1, HaveReg2;
+ const MCExpr *Expr;
+ const MCExpr *Length;
+ if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length))
+ return true;
+ // If the register combination is not valid for any instruction, reject it.
+ // Otherwise, fall back to reporting an unrecognized instruction.
+ if (HaveReg1 && Reg1.Group != RegGR && Reg1.Group != RegV
+ && parseAddressRegister(Reg1))
+ return true;
+ if (HaveReg2 && parseAddressRegister(Reg2))
+ return true;
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ if (HaveReg1 || HaveReg2 || Length)
+ Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc));
+ else
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+ return false;
+}
+
+bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+ switch (MatchResult) {
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, getSTI());
+ return false;
+
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing
+ std::string Msg = "instruction requires:";
+ uint64_t Mask = 1;
+ for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
+
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((SystemZOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+ }
+
+ llvm_unreachable("Unexpected match type");
+}
+
+OperandMatchResultTy
+SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal, bool AllowTLS) {
+ MCContext &Ctx = getContext();
+ MCStreamer &Out = getStreamer();
+ const MCExpr *Expr;
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_NoMatch;
+
+ // For consistency with the GNU assembler, treat immediates as offsets
+ // from ".".
+ if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
+ int64_t Value = CE->getValue();
+ if ((Value & 1) || Value < MinVal || Value > MaxVal) {
+ Error(StartLoc, "offset out of range");
+ return MatchOperand_ParseFail;
+ }
+ MCSymbol *Sym = Ctx.createTempSymbol();
+ Out.EmitLabel(Sym);
+ const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
+ Ctx);
+ Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx);
+ }
+
+ // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
+ const MCExpr *Sym = nullptr;
+ if (AllowTLS && getLexer().is(AsmToken::Colon)) {
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ return MatchOperand_ParseFail;
+ }
+
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ StringRef Name = Parser.getTok().getString();
+ if (Name == "tls_gdcall")
+ Kind = MCSymbolRefExpr::VK_TLSGD;
+ else if (Name == "tls_ldcall")
+ Kind = MCSymbolRefExpr::VK_TLSLDM;
+ else {
+ Error(Parser.getTok().getLoc(), "unknown TLS tag");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ return MatchOperand_ParseFail;
+ }
+
+ StringRef Identifier = Parser.getTok().getString();
+ Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier),
+ Kind, Ctx);
+ Parser.Lex();
+ }
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ if (AllowTLS)
+ Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym,
+ StartLoc, EndLoc));
+ else
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmParser() {
+ RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
new file mode 100644
index 000000000000..8903b57ffd0b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -0,0 +1,482 @@
+//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+class SystemZDisassembler : public MCDisassembler {
+public:
+ SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+ ~SystemZDisassembler() override = default;
+
+ DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+
+} // end anonymous namespace
+
+static MCDisassembler *createSystemZDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SystemZDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeSystemZDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(),
+ createSystemZDisassembler);
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst.
+///
+/// @param Value - The immediate Value, has had any PC adjustment made by
+/// the caller.
+/// @param isBranch - If the instruction is a branch instruction
+/// @param Address - The starting address of the instruction
+/// @param Offset - The byte offset to this immediate in the instruction
+/// @param Width - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width. If that
+/// returns non-zero then the symbolic information it returns is used to create
+/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created. This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Width, MCInst &MI,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
+ Offset, Width);
+}
+
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ const unsigned *Regs, unsigned Size) {
+ assert(RegNo < Size && "Invalid register");
+ RegNo = Regs[RegNo];
+ if (RegNo == 0)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16);
+}
+
+static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16);
+}
+
+static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
+}
+
+static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16);
+}
+
+static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
+}
+
+static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16);
+}
+
+static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16);
+}
+
+static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16);
+}
+
+static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32);
+}
+
+static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32);
+}
+
+static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32);
+}
+
+static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16);
+}
+
+static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16);
+}
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<1>(Inst, Imm);
+}
+
+static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<2>(Inst, Imm);
+}
+
+static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<3>(Inst, Imm);
+}
+
+static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<4>(Inst, Imm);
+}
+
+static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<6>(Inst, Imm);
+}
+
+static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<8>(Inst, Imm);
+}
+
+static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<12>(Inst, Imm);
+}
+
+static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<16>(Inst, Imm);
+}
+
+static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<32>(Inst, Imm);
+}
+
+static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeSImmOperand<8>(Inst, Imm);
+}
+
+static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeSImmOperand<16>(Inst, Imm);
+}
+
+static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeSImmOperand<32>(Inst, Imm);
+}
+
+template<unsigned N>
+static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ bool isBranch,
+ const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
+ uint64_t Value = SignExtend64<N>(Imm) * 2 + Address;
+
+ if (!tryAddingSymbolicOperand(Value, isBranch, Address, 2, N / 8,
+ Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(Value));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodePC12DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<12>(Inst, Imm, Address, true, Decoder);
+}
+
+static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder);
+}
+
+static DecodeStatus decodePC24DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<24>(Inst, Imm, Address, true, Decoder);
+}
+
+static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder);
+}
+
+static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder);
+}
+
+static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Base = Field >> 12;
+ uint64_t Disp = Field & 0xfff;
+ assert(Base < 16 && "Invalid BDAddr12");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Base = Field >> 20;
+ uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff);
+ assert(Base < 16 && "Invalid BDAddr20");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Index = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Index < 16 && "Invalid BDXAddr12");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Index = Field >> 24;
+ uint64_t Base = (Field >> 20) & 0xf;
+ uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12);
+ assert(Index < 16 && "Invalid BDXAddr20");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
+ Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDLAddr12Len4Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Length = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Length < 16 && "Invalid BDLAddr12Len4");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createImm(Length + 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Length = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Length < 256 && "Invalid BDLAddr12Len8");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createImm(Length + 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDRAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Length = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Length < 16 && "Invalid BDRAddr12");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createReg(Regs[Length]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Index = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Index < 32 && "Invalid BDVAddr12");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createReg(SystemZMC::VR128Regs[Index]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs);
+}
+
+static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDLAddr64Disp12Len4Operand(MCInst &Inst,
+ uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDLAddr12Len4Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
+ uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDRAddr64Disp12Operand(MCInst &Inst,
+ uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDRAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
+#include "SystemZGenDisassemblerTables.inc"
+
+DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ raw_ostream &CS) const {
+ // Get the first two bytes of the instruction.
+ Size = 0;
+ if (Bytes.size() < 2)
+ return MCDisassembler::Fail;
+
+ // The top 2 bits of the first byte specify the size.
+ const uint8_t *Table;
+ if (Bytes[0] < 0x40) {
+ Size = 2;
+ Table = DecoderTable16;
+ } else if (Bytes[0] < 0xc0) {
+ Size = 4;
+ Table = DecoderTable32;
+ } else {
+ Size = 6;
+ Table = DecoderTable48;
+ }
+
+ // Read any remaining bytes.
+ if (Bytes.size() < Size)
+ return MCDisassembler::Fail;
+
+ // Construct the instruction.
+ uint64_t Inst = 0;
+ for (uint64_t I = 0; I < Size; ++I)
+ Inst = (Inst << 8) | Bytes[I];
+
+ return decodeInstruction(Table, MI, Inst, Address, this, STI);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
new file mode 100644
index 000000000000..6cd12e13e220
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -0,0 +1,234 @@
+//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
+ unsigned Index, raw_ostream &O) {
+ O << Disp;
+ if (Base || Index) {
+ O << '(';
+ if (Index) {
+ O << '%' << getRegisterName(Index);
+ if (Base)
+ O << ',';
+ }
+ if (Base)
+ O << '%' << getRegisterName(Base);
+ O << ')';
+ }
+}
+
+void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
+ raw_ostream &O) {
+ if (MO.isReg())
+ O << '%' << getRegisterName(MO.getReg());
+ else if (MO.isImm())
+ O << MO.getImm();
+ else if (MO.isExpr())
+ MO.getExpr()->print(O, MAI);
+ else
+ llvm_unreachable("Invalid operand");
+}
+
+void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ O << '%' << getRegisterName(RegNo);
+}
+
+template <unsigned N>
+static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isUInt<N>(Value) && "Invalid uimm argument");
+ O << Value;
+}
+
+template <unsigned N>
+static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isInt<N>(Value) && "Invalid simm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<1>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<2>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<3>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<4>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<6>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printSImmOperand<8>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<8>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<12>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printSImmOperand<16>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<16>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printSImmOperand<32>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<32>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU48ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<48>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.isImm()) {
+ O << "0x";
+ O.write_hex(MO.getImm());
+ } else
+ MO.getExpr()->print(O, &MAI);
+}
+
+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ // Output the PC-relative operand.
+ printPCRelOperand(MI, OpNum, O);
+
+ // Output the TLS marker if present.
+ if ((unsigned)OpNum + 1 < MI->getNumOperands()) {
+ const MCOperand &MO = MI->getOperand(OpNum + 1);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr());
+ switch (refExp.getKind()) {
+ case MCSymbolRefExpr::VK_TLSGD:
+ O << ":tls_gdcall:";
+ break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ O << ":tls_ldcall:";
+ break;
+ default:
+ llvm_unreachable("Unexpected symbol kind");
+ }
+ O << refExp.getSymbol().getName();
+ }
+}
+
+void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printOperand(MI->getOperand(OpNum), &MAI, O);
+}
+
+void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(MI->getOperand(OpNum).getReg(),
+ MI->getOperand(OpNum + 1).getImm(), 0, O);
+}
+
+void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(MI->getOperand(OpNum).getReg(),
+ MI->getOperand(OpNum + 1).getImm(),
+ MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned Base = MI->getOperand(OpNum).getReg();
+ uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ uint64_t Length = MI->getOperand(OpNum + 2).getImm();
+ O << Disp << '(' << Length;
+ if (Base)
+ O << ",%" << getRegisterName(Base);
+ O << ')';
+}
+
+void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned Base = MI->getOperand(OpNum).getReg();
+ uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ unsigned Length = MI->getOperand(OpNum + 2).getReg();
+ O << Disp << "(%" << getRegisterName(Length);
+ if (Base)
+ O << ",%" << getRegisterName(Base);
+ O << ')';
+}
+
+void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(MI->getOperand(OpNum).getReg(),
+ MI->getOperand(OpNum + 1).getImm(),
+ MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ static const char *const CondNames[] = {
+ "o", "h", "nle", "l", "nhe", "lh", "ne",
+ "e", "nlh", "he", "nl", "le", "nh", "no"
+ };
+ uint64_t Imm = MI->getOperand(OpNum).getImm();
+ assert(Imm > 0 && Imm < 15 && "Invalid condition");
+ O << CondNames[Imm - 1];
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
new file mode 100644
index 000000000000..d65c661545eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -0,0 +1,78 @@
+//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SystemZ MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include <cstdint>
+
+namespace llvm {
+
+class MCOperand;
+
+class SystemZInstPrinter : public MCInstPrinter {
+public:
+ SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ // Automatically generated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ // Print an address with the given base, displacement and index.
+ static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
+ raw_ostream &O);
+
+ // Print the given operand.
+ static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
+ raw_ostream &O);
+
+ // Override MCInstPrinter.
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+
+private:
+ // Print various types of operand.
+ void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+
+ // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
+ // This forms part of the instruction name rather than the operand list.
+ void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
new file mode 100644
index 000000000000..6b32a7926437
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -0,0 +1,129 @@
+//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+using namespace llvm;
+
+// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot].
+// Return the bits that should be installed in a relocation field for
+// fixup kind Kind.
+static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
+ if (Kind < FirstTargetFixupKind)
+ return Value;
+
+ switch (unsigned(Kind)) {
+ case SystemZ::FK_390_PC12DBL:
+ case SystemZ::FK_390_PC16DBL:
+ case SystemZ::FK_390_PC24DBL:
+ case SystemZ::FK_390_PC32DBL:
+ return (int64_t)Value / 2;
+
+ case SystemZ::FK_390_TLS_CALL:
+ return 0;
+ }
+
+ llvm_unreachable("Unknown fixup kind!");
+}
+
+namespace {
+class SystemZMCAsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+public:
+ SystemZMCAsmBackend(uint8_t osABI)
+ : OSABI(osABI) {}
+
+ // Override MCAsmBackend
+ unsigned getNumFixupKinds() const override {
+ return SystemZ::NumTargetFixupKinds;
+ }
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
+ return false;
+ }
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *Fragment,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override {
+ llvm_unreachable("SystemZ does do not have assembler relaxation");
+ }
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createSystemZObjectWriter(OS, OSABI);
+ }
+};
+} // end anonymous namespace
+
+const MCFixupKindInfo &
+SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
+ { "FK_390_PC12DBL", 4, 12, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_PC24DBL", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_TLS_CALL", 0, 0, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
+void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
+ MCFixupKind Kind = Fixup.getKind();
+ unsigned Offset = Fixup.getOffset();
+ unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
+ unsigned Size = (BitSize + 7) / 8;
+
+ assert(Offset + Size <= Data.size() && "Invalid fixup offset!");
+
+ // Big-endian insertion of Size bytes.
+ Value = extractBitsForFixup(Kind, Value);
+ if (BitSize < 64)
+ Value &= ((uint64_t)1 << BitSize) - 1;
+ unsigned ShiftValue = (Size * 8) - 8;
+ for (unsigned I = 0; I != Size; ++I) {
+ Data[Offset + I] |= uint8_t(Value >> ShiftValue);
+ ShiftValue -= 8;
+ }
+}
+
+bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const {
+ for (uint64_t I = 0; I != Count; ++I)
+ OW->write8(7);
+ return true;
+}
+
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options) {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ return new SystemZMCAsmBackend(OSABI);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 000000000000..6e00981939b6
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,29 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
+ CodePointerSize = 8;
+ CalleeSaveStackSlotSize = 8;
+ IsLittleEndian = false;
+
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = "\t.quad\t";
+ UsesELFSectionDirectiveForBSS = true;
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ UseIntegratedAssembler = true;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 000000000000..800f89232063
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,26 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class Triple;
+
+class SystemZMCAsmInfo : public MCAsmInfoELF {
+public:
+ explicit SystemZMCAsmInfo(const Triple &TT);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
new file mode 100644
index 000000000000..d188f56512ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -0,0 +1,307 @@
+//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace {
+
+class SystemZMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+ MCContext &Ctx;
+
+public:
+ SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {
+ }
+
+ ~SystemZMCCodeEmitter() override = default;
+
+ // OVerride MCCodeEmitter.
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+private:
+ // Automatically generated by TableGen.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // Called by the TableGen code to get the binary encoding of operand
+ // MO in MI. Fixups is the list of fixups against MI.
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // Called by the TableGen code to get the binary encoding of an address.
+ // The index or length, if any, is encoded first, followed by the base,
+ // followed by the displacement. In a 20-bit displacement,
+ // the low 12 bits are encoded before the high 8 bits.
+ uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
+ // Offset bytes from the start of MI. Add the fixup to Fixups
+ // and return the in-place addend, which since we're a RELA target
+ // is always 0. If AllowTLS is true and optional operand OpNum + 1
+ // is present, also emit a TLS call fixup for it.
+ uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ unsigned Kind, int64_t Offset,
+ bool AllowTLS) const;
+
+ uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC16DBL, 2, false);
+ }
+ uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC32DBL, 2, false);
+ }
+ uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC16DBL, 2, true);
+ }
+ uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC32DBL, 2, true);
+ }
+ uint64_t getPC12DBLBPPEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC12DBL, 1, false);
+ }
+ uint64_t getPC16DBLBPPEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC16DBL, 4, false);
+ }
+ uint64_t getPC24DBLBPPEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC24DBL, 3, false);
+ }
+
+private:
+ uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+ void verifyInstructionPredicates(const MCInst &MI,
+ uint64_t AvailableFeatures) const;
+};
+
+} // end anonymous namespace
+
+void SystemZMCCodeEmitter::
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ verifyInstructionPredicates(MI,
+ computeAvailableFeatures(STI.getFeatureBits()));
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+ unsigned Size = MCII.get(MI.getOpcode()).getSize();
+ // Big-endian insertion of Size bytes.
+ unsigned ShiftValue = (Size * 8) - 8;
+ for (unsigned I = 0; I != Size; ++I) {
+ OS << uint8_t(Bits >> ShiftValue);
+ ShiftValue -= 8;
+ }
+}
+
+uint64_t SystemZMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<uint64_t>(MO.getImm());
+ llvm_unreachable("Unexpected operand type!");
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp));
+ return (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ assert(isUInt<4>(Base) && isInt<20>(Disp));
+ return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12);
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index));
+ return (Index << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+ assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index));
+ return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8)
+ | ((Disp & 0xff000) >> 12);
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
+ return (Len << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len));
+ return (Len << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
+ return (Len << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
+getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
+ return (Index << 16) | (Base << 12) | Disp;
+}
+
+uint64_t
+SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ unsigned Kind, int64_t Offset,
+ bool AllowTLS) const {
+ const MCOperand &MO = MI.getOperand(OpNum);
+ const MCExpr *Expr;
+ if (MO.isImm())
+ Expr = MCConstantExpr::create(MO.getImm() + Offset, Ctx);
+ else {
+ Expr = MO.getExpr();
+ if (Offset) {
+ // The operand value is relative to the start of MI, but the fixup
+ // is relative to the operand field itself, which is Offset bytes
+ // into MI. Add Offset to the relocation value to cancel out
+ // this difference.
+ const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx);
+ Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx);
+ }
+ }
+ Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind));
+
+ // Output the fixup for the TLS marker if present.
+ if (AllowTLS && OpNum + 1 < MI.getNumOperands()) {
+ const MCOperand &MOTLS = MI.getOperand(OpNum + 1);
+ Fixups.push_back(MCFixup::create(0, MOTLS.getExpr(),
+ (MCFixupKind)SystemZ::FK_390_TLS_CALL));
+ }
+ return 0;
+}
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
+#include "SystemZGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new SystemZMCCodeEmitter(MCII, Ctx);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
new file mode 100644
index 000000000000..c012accc14dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -0,0 +1,32 @@
+//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace SystemZ {
+enum FixupKind {
+ // These correspond directly to R_390_* relocations.
+ FK_390_PC12DBL = FirstTargetFixupKind,
+ FK_390_PC16DBL,
+ FK_390_PC24DBL,
+ FK_390_PC32DBL,
+ FK_390_TLS_CALL,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace SystemZ
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
new file mode 100644
index 000000000000..df0a8161e6e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -0,0 +1,167 @@
+//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+namespace {
+
+class SystemZObjectWriter : public MCELFObjectTargetWriter {
+public:
+ SystemZObjectWriter(uint8_t OSABI);
+ ~SystemZObjectWriter() override = default;
+
+protected:
+ // Override MCELFObjectTargetWriter.
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+
+} // end anonymous namespace
+
+SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
+ /*HasRelocationAddend=*/ true) {}
+
+// Return the relocation type for an absolute value of MCFixupKind Kind.
+static unsigned getAbsoluteReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_1: return ELF::R_390_8;
+ case FK_Data_2: return ELF::R_390_16;
+ case FK_Data_4: return ELF::R_390_32;
+ case FK_Data_8: return ELF::R_390_64;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the relocation type for a PC-relative value of MCFixupKind Kind.
+static unsigned getPCRelReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_2: return ELF::R_390_PC16;
+ case FK_Data_4: return ELF::R_390_PC32;
+ case FK_Data_8: return ELF::R_390_PC64;
+ case SystemZ::FK_390_PC12DBL: return ELF::R_390_PC12DBL;
+ case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL;
+ case SystemZ::FK_390_PC24DBL: return ELF::R_390_PC24DBL;
+ case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL;
+ }
+ llvm_unreachable("Unsupported PC-relative address");
+}
+
+// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
+static unsigned getTLSLEReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_LE32;
+ case FK_Data_8: return ELF::R_390_TLS_LE64;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
+static unsigned getTLSLDOReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_LDO32;
+ case FK_Data_8: return ELF::R_390_TLS_LDO64;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
+static unsigned getTLSLDMReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_LDM32;
+ case FK_Data_8: return ELF::R_390_TLS_LDM64;
+ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
+static unsigned getTLSGDReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_GD32;
+ case FK_Data_8: return ELF::R_390_TLS_GD64;
+ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the PLT relocation counterpart of MCFixupKind Kind.
+static unsigned getPLTReloc(unsigned Kind) {
+ switch (Kind) {
+ case SystemZ::FK_390_PC12DBL: return ELF::R_390_PLT12DBL;
+ case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
+ case SystemZ::FK_390_PC24DBL: return ELF::R_390_PLT24DBL;
+ case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+ unsigned Kind = Fixup.getKind();
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_None:
+ if (IsPCRel)
+ return getPCRelReloc(Kind);
+ return getAbsoluteReloc(Kind);
+
+ case MCSymbolRefExpr::VK_NTPOFF:
+ assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
+ return getTLSLEReloc(Kind);
+
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+ return ELF::R_390_TLS_IEENT;
+ llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
+
+ case MCSymbolRefExpr::VK_DTPOFF:
+ assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
+ return getTLSLDOReloc(Kind);
+
+ case MCSymbolRefExpr::VK_TLSLDM:
+ assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
+ return getTLSLDMReloc(Kind);
+
+ case MCSymbolRefExpr::VK_TLSGD:
+ assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
+ return getTLSGDReloc(Kind);
+
+ case MCSymbolRefExpr::VK_GOT:
+ if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+ return ELF::R_390_GOTENT;
+ llvm_unreachable("Only PC-relative GOT accesses are supported for now");
+
+ case MCSymbolRefExpr::VK_PLT:
+ assert(IsPCRel && "@PLT shouldt be PC-relative");
+ return getPLTReloc(Kind);
+
+ default:
+ llvm_unreachable("Modifier not supported");
+ }
+}
+
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 000000000000..727ab921daf9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,253 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+const unsigned SystemZMC::GR32Regs[16] = {
+ SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L,
+ SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L,
+ SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L,
+ SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L
+};
+
+const unsigned SystemZMC::GRH32Regs[16] = {
+ SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H,
+ SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H,
+ SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H,
+ SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H
+};
+
+const unsigned SystemZMC::GR64Regs[16] = {
+ SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
+ SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
+ SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
+ SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
+};
+
+const unsigned SystemZMC::GR128Regs[16] = {
+ SystemZ::R0Q, 0, SystemZ::R2Q, 0,
+ SystemZ::R4Q, 0, SystemZ::R6Q, 0,
+ SystemZ::R8Q, 0, SystemZ::R10Q, 0,
+ SystemZ::R12Q, 0, SystemZ::R14Q, 0
+};
+
+const unsigned SystemZMC::FP32Regs[16] = {
+ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
+};
+
+const unsigned SystemZMC::FP64Regs[16] = {
+ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
+};
+
+const unsigned SystemZMC::FP128Regs[16] = {
+ SystemZ::F0Q, SystemZ::F1Q, 0, 0,
+ SystemZ::F4Q, SystemZ::F5Q, 0, 0,
+ SystemZ::F8Q, SystemZ::F9Q, 0, 0,
+ SystemZ::F12Q, SystemZ::F13Q, 0, 0
+};
+
+const unsigned SystemZMC::VR32Regs[32] = {
+ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+ SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S,
+ SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S,
+ SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S,
+ SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S
+};
+
+const unsigned SystemZMC::VR64Regs[32] = {
+ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
+ SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D,
+ SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D,
+ SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D,
+ SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D
+};
+
+const unsigned SystemZMC::VR128Regs[32] = {
+ SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3,
+ SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7,
+ SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11,
+ SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15,
+ SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19,
+ SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23,
+ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
+ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31
+};
+
+const unsigned SystemZMC::AR32Regs[16] = {
+ SystemZ::A0, SystemZ::A1, SystemZ::A2, SystemZ::A3,
+ SystemZ::A4, SystemZ::A5, SystemZ::A6, SystemZ::A7,
+ SystemZ::A8, SystemZ::A9, SystemZ::A10, SystemZ::A11,
+ SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15
+};
+
+const unsigned SystemZMC::CR64Regs[16] = {
+ SystemZ::C0, SystemZ::C1, SystemZ::C2, SystemZ::C3,
+ SystemZ::C4, SystemZ::C5, SystemZ::C6, SystemZ::C7,
+ SystemZ::C8, SystemZ::C9, SystemZ::C10, SystemZ::C11,
+ SystemZ::C12, SystemZ::C13, SystemZ::C14, SystemZ::C15
+};
+
+unsigned SystemZMC::getFirstReg(unsigned Reg) {
+ static unsigned Map[SystemZ::NUM_TARGET_REGS];
+ static bool Initialized = false;
+ if (!Initialized) {
+ for (unsigned I = 0; I < 16; ++I) {
+ Map[GR32Regs[I]] = I;
+ Map[GRH32Regs[I]] = I;
+ Map[GR64Regs[I]] = I;
+ Map[GR128Regs[I]] = I;
+ Map[FP128Regs[I]] = I;
+ Map[AR32Regs[I]] = I;
+ }
+ for (unsigned I = 0; I < 32; ++I) {
+ Map[VR32Regs[I]] = I;
+ Map[VR64Regs[I]] = I;
+ Map[VR128Regs[I]] = I;
+ }
+ }
+ assert(Reg < SystemZ::NUM_TARGET_REGS);
+ return Map[Reg];
+}
+
+static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT) {
+ MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(nullptr,
+ MRI.getDwarfRegNum(SystemZ::R15D, true),
+ SystemZMC::CFAOffsetFromInitialSP);
+ MAI->addInitialFrameState(Inst);
+ return MAI;
+}
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSystemZMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSystemZMCRegisterInfo(X, SystemZ::R14D);
+ return X;
+}
+
+static MCSubtargetInfo *
+createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
+ CodeModel::Model &CM) {
+ // For SystemZ we define the models as follows:
+ //
+ // Small: BRASL can call any function and will use a stub if necessary.
+ // Locally-binding symbols will always be in range of LARL.
+ //
+ // Medium: BRASL can call any function and will use a stub if necessary.
+ // GOT slots and locally-defined text will always be in range
+ // of LARL, but other symbols might not be.
+ //
+ // Large: Equivalent to Medium for now.
+ //
+ // Kernel: Equivalent to Medium for now.
+ //
+ // This means that any PIC module smaller than 4GB meets the
+ // requirements of Small, so Small seems like the best default there.
+ //
+ // All symbols bind locally in a non-PIC module, so the choice is less
+ // obvious. There are two cases:
+ //
+ // - When creating an executable, PLTs and copy relocations allow
+ // us to treat external symbols as part of the executable.
+ // Any executable smaller than 4GB meets the requirements of Small,
+ // so that seems like the best default.
+ //
+ // - When creating JIT code, stubs will be in range of BRASL if the
+ // image is less than 4GB in size. GOT entries will likewise be
+ // in range of LARL. However, the JIT environment has no equivalent
+ // of copy relocs, so locally-binding data symbols might not be in
+ // the range of LARL. We need the Medium model in that case.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault)
+ CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
+}
+
+static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ return new SystemZInstPrinter(MAI, MII, MRI);
+}
+
+extern "C" void LLVMInitializeSystemZTargetMC() {
+ // Register the MCAsmInfo.
+ TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
+ createSystemZMCAsmInfo);
+
+ // Register the adjustCodeGenOpts.
+ TargetRegistry::registerMCAdjustCodeGenOpts(getTheSystemZTarget(),
+ adjustCodeGenOpts);
+
+ // Register the MCCodeEmitter.
+ TargetRegistry::RegisterMCCodeEmitter(getTheSystemZTarget(),
+ createSystemZMCCodeEmitter);
+
+ // Register the MCInstrInfo.
+ TargetRegistry::RegisterMCInstrInfo(getTheSystemZTarget(),
+ createSystemZMCInstrInfo);
+
+ // Register the MCRegisterInfo.
+ TargetRegistry::RegisterMCRegInfo(getTheSystemZTarget(),
+ createSystemZMCRegisterInfo);
+
+ // Register the MCSubtargetInfo.
+ TargetRegistry::RegisterMCSubtargetInfo(getTheSystemZTarget(),
+ createSystemZMCSubtargetInfo);
+
+ // Register the MCAsmBackend.
+ TargetRegistry::RegisterMCAsmBackend(getTheSystemZTarget(),
+ createSystemZMCAsmBackend);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(getTheSystemZTarget(),
+ createSystemZMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 000000000000..dbca3485290a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,109 @@
+//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class MCTargetOptions;
+class StringRef;
+class Target;
+class Triple;
+class raw_pwrite_stream;
+class raw_ostream;
+
+Target &getTheSystemZTarget();
+
+namespace SystemZMC {
+// How many bytes are in the ABI-defined, caller-allocated part of
+// a stack frame.
+const int64_t CallFrameSize = 160;
+
+// The offset of the DWARF CFA from the incoming stack pointer.
+const int64_t CFAOffsetFromInitialSP = CallFrameSize;
+
+// Maps of asm register numbers to LLVM register numbers, with 0 indicating
+// an invalid register. In principle we could use 32-bit and 64-bit register
+// classes directly, provided that we relegated the GPR allocation order
+// in SystemZRegisterInfo.td to an AltOrder and left the default order
+// as %r0-%r15. It seems better to provide the same interface for
+// all classes though.
+extern const unsigned GR32Regs[16];
+extern const unsigned GRH32Regs[16];
+extern const unsigned GR64Regs[16];
+extern const unsigned GR128Regs[16];
+extern const unsigned FP32Regs[16];
+extern const unsigned FP64Regs[16];
+extern const unsigned FP128Regs[16];
+extern const unsigned VR32Regs[32];
+extern const unsigned VR64Regs[32];
+extern const unsigned VR128Regs[32];
+extern const unsigned AR32Regs[16];
+extern const unsigned CR64Regs[16];
+
+// Return the 0-based number of the first architectural register that
+// contains the given LLVM register. E.g. R1D -> 1.
+unsigned getFirstReg(unsigned Reg);
+
+// Return the given register as a GR64.
+inline unsigned getRegAsGR64(unsigned Reg) {
+ return GR64Regs[getFirstReg(Reg)];
+}
+
+// Return the given register as a low GR32.
+inline unsigned getRegAsGR32(unsigned Reg) {
+ return GR32Regs[getFirstReg(Reg)];
+}
+
+// Return the given register as a high GR32.
+inline unsigned getRegAsGRH32(unsigned Reg) {
+ return GRH32Regs[getFirstReg(Reg)];
+}
+
+// Return the given register as a VR128.
+inline unsigned getRegAsVR128(unsigned Reg) {
+ return VR128Regs[getFirstReg(Reg)];
+}
+} // end namespace SystemZMC
+
+MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options);
+
+MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
+} // end namespace llvm
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt
new file mode 100644
index 000000000000..9b714157550d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/README.txt
@@ -0,0 +1,159 @@
+//===---------------------------------------------------------------------===//
+// Random notes about and ideas for the SystemZ backend.
+//===---------------------------------------------------------------------===//
+
+The initial backend is deliberately restricted to z10. We should add support
+for later architectures at some point.
+
+--
+
+If an inline asm ties an i32 "r" result to an i64 input, the input
+will be treated as an i32, leaving the upper bits uninitialised.
+For example:
+
+define void @f4(i32 *%dst) {
+ %val = call i32 asm "blah $0", "=r,0" (i64 103)
+ store i32 %val, i32 *%dst
+ ret void
+}
+
+from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI.
+to load 103. This seems to be a general target-independent problem.
+
+--
+
+The tuning of the choice between LOAD ADDRESS (LA) and addition in
+SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on
+performance measurements.
+
+--
+
+There is no scheduling support.
+
+--
+
+We don't use the BRANCH ON INDEX instructions.
+
+--
+
+We only use MVC, XC and CLC for constant-length block operations.
+We could extend them to variable-length operations too,
+using EXECUTE RELATIVE LONG.
+
+MVCIN, MVCLE and CLCLE may be worthwhile too.
+
+--
+
+We don't use CUSE or the TRANSLATE family of instructions for string
+operations. The TRANSLATE ones are probably more difficult to exploit.
+
+--
+
+We don't take full advantage of builtins like fabsl because the calling
+conventions require f128s to be returned by invisible reference.
+
+--
+
+ADD LOGICAL WITH SIGNED IMMEDIATE could be useful when we need to
+produce a carry. SUBTRACT LOGICAL IMMEDIATE could be useful when we
+need to produce a borrow. (Note that there are no memory forms of
+ADD LOGICAL WITH CARRY and SUBTRACT LOGICAL WITH BORROW, so the high
+part of 128-bit memory operations would probably need to be done
+via a register.)
+
+--
+
+We don't use ICM, STCM, or CLM.
+
+--
+
+We don't use ADD (LOGICAL) HIGH, SUBTRACT (LOGICAL) HIGH,
+or COMPARE (LOGICAL) HIGH yet.
+
+--
+
+DAGCombiner doesn't yet fold truncations of extended loads. Functions like:
+
+ unsigned long f (unsigned long x, unsigned short *y)
+ {
+ return (x << 32) | *y;
+ }
+
+therefore end up as:
+
+ sllg %r2, %r2, 32
+ llgh %r0, 0(%r3)
+ lr %r2, %r0
+ br %r14
+
+but truncating the load would give:
+
+ sllg %r2, %r2, 32
+ lh %r2, 0(%r3)
+ br %r14
+
+--
+
+Functions like:
+
+define i64 @f1(i64 %a) {
+ %and = and i64 %a, 1
+ ret i64 %and
+}
+
+ought to be implemented as:
+
+ lhi %r0, 1
+ ngr %r2, %r0
+ br %r14
+
+but two-address optimizations reverse the order of the AND and force:
+
+ lhi %r0, 1
+ ngr %r0, %r2
+ lgr %r2, %r0
+ br %r14
+
+CodeGen/SystemZ/and-04.ll has several examples of this.
+
+--
+
+Out-of-range displacements are usually handled by loading the full
+address into a register. In many cases it would be better to create
+an anchor point instead. E.g. for:
+
+define void @f4a(i128 *%aptr, i64 %base) {
+ %addr = add i64 %base, 524288
+ %bptr = inttoptr i64 %addr to i128 *
+ %a = load volatile i128 *%aptr
+ %b = load i128 *%bptr
+ %add = add i128 %a, %b
+ store i128 %add, i128 *%aptr
+ ret void
+}
+
+(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296
+into separate registers, rather than using %base+524288 as a base for both.
+
+--
+
+Dynamic stack allocations round the size to 8 bytes and then allocate
+that rounded amount. It would be simpler to subtract the unrounded
+size from the copy of the stack pointer and then align the result.
+See CodeGen/SystemZ/alloca-01.ll for an example.
+
+--
+
+If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG.
+
+--
+
+We might want to model all access registers and use them to spill
+32-bit values.
+
+--
+
+We might want to use the 'overflow' condition of eg. AR to support
+llvm.sadd.with.overflow.i32 and related instructions - the generated code
+for signed overflow check is currently quite bad. This would improve
+the results of using -ftrapv.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 000000000000..9a8e508e4119
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,185 @@
+//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+class SystemZTargetMachine;
+class FunctionPass;
+
+namespace SystemZ {
+// Condition-code mask values.
+const unsigned CCMASK_0 = 1 << 3;
+const unsigned CCMASK_1 = 1 << 2;
+const unsigned CCMASK_2 = 1 << 1;
+const unsigned CCMASK_3 = 1 << 0;
+const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
+
+// Condition-code mask assignments for integer and floating-point
+// comparisons.
+const unsigned CCMASK_CMP_EQ = CCMASK_0;
+const unsigned CCMASK_CMP_LT = CCMASK_1;
+const unsigned CCMASK_CMP_GT = CCMASK_2;
+const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT;
+const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT;
+const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT;
+
+// Condition-code mask assignments for floating-point comparisons only.
+const unsigned CCMASK_CMP_UO = CCMASK_3;
+const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO;
+
+// All condition-code values produced by comparisons.
+const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2;
+const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
+
+// Condition-code mask assignments for CS.
+const unsigned CCMASK_CS_EQ = CCMASK_0;
+const unsigned CCMASK_CS_NE = CCMASK_1;
+const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1;
+
+// Condition-code mask assignments for a completed SRST loop.
+const unsigned CCMASK_SRST_FOUND = CCMASK_1;
+const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
+const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
+
+// Condition-code mask assignments for TEST UNDER MASK.
+const unsigned CCMASK_TM_ALL_0 = CCMASK_0;
+const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1;
+const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2;
+const unsigned CCMASK_TM_ALL_1 = CCMASK_3;
+const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY;
+const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY;
+const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1;
+const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
+const unsigned CCMASK_TM = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3;
+const unsigned CCMASK_TBEGIN = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
+// Condition-code mask assignments for vector comparisons (and similar
+// operations).
+const unsigned CCMASK_VCMP_ALL = CCMASK_0;
+const unsigned CCMASK_VCMP_MIXED = CCMASK_1;
+const unsigned CCMASK_VCMP_NONE = CCMASK_3;
+const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3;
+
+// Condition-code mask assignments for Test Data Class.
+const unsigned CCMASK_TDC_NOMATCH = CCMASK_0;
+const unsigned CCMASK_TDC_MATCH = CCMASK_1;
+const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH;
+
+// The position of the low CC bit in an IPM result.
+const unsigned IPM_CC = 28;
+
+// Mask assignments for PFD.
+const unsigned PFD_READ = 1;
+const unsigned PFD_WRITE = 2;
+
+// Mask assignments for TDC
+const unsigned TDCMASK_ZERO_PLUS = 0x800;
+const unsigned TDCMASK_ZERO_MINUS = 0x400;
+const unsigned TDCMASK_NORMAL_PLUS = 0x200;
+const unsigned TDCMASK_NORMAL_MINUS = 0x100;
+const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080;
+const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040;
+const unsigned TDCMASK_INFINITY_PLUS = 0x020;
+const unsigned TDCMASK_INFINITY_MINUS = 0x010;
+const unsigned TDCMASK_QNAN_PLUS = 0x008;
+const unsigned TDCMASK_QNAN_MINUS = 0x004;
+const unsigned TDCMASK_SNAN_PLUS = 0x002;
+const unsigned TDCMASK_SNAN_MINUS = 0x001;
+
+const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS;
+const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS |
+ TDCMASK_SUBNORMAL_PLUS |
+ TDCMASK_INFINITY_PLUS;
+const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS |
+ TDCMASK_SUBNORMAL_MINUS |
+ TDCMASK_INFINITY_MINUS;
+const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS |
+ TDCMASK_QNAN_MINUS |
+ TDCMASK_SNAN_PLUS |
+ TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE |
+ TDCMASK_ZERO_PLUS |
+ TDCMASK_QNAN_PLUS |
+ TDCMASK_SNAN_PLUS;
+const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE |
+ TDCMASK_ZERO_MINUS |
+ TDCMASK_QNAN_MINUS |
+ TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS;
+
+// Number of bits in a vector register.
+const unsigned VectorBits = 128;
+
+// Number of bytes in a vector register (and consequently the number of
+// bytes in a general permute vector).
+const unsigned VectorBytes = VectorBits / 8;
+
+// Return true if Val fits an LLILL operand.
+static inline bool isImmLL(uint64_t Val) {
+ return (Val & ~0x000000000000ffffULL) == 0;
+}
+
+// Return true if Val fits an LLILH operand.
+static inline bool isImmLH(uint64_t Val) {
+ return (Val & ~0x00000000ffff0000ULL) == 0;
+}
+
+// Return true if Val fits an LLIHL operand.
+static inline bool isImmHL(uint64_t Val) {
+ return (Val & ~0x00000ffff00000000ULL) == 0;
+}
+
+// Return true if Val fits an LLIHH operand.
+static inline bool isImmHH(uint64_t Val) {
+ return (Val & ~0xffff000000000000ULL) == 0;
+}
+
+// Return true if Val fits an LLILF operand.
+static inline bool isImmLF(uint64_t Val) {
+ return (Val & ~0x00000000ffffffffULL) == 0;
+}
+
+// Return true if Val fits an LLIHF operand.
+static inline bool isImmHF(uint64_t Val) {
+ return (Val & ~0xffffffff00000000ULL) == 0;
+}
+} // end namespace SystemZ
+
+FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZTDCPass();
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 000000000000..41300a1b6295
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,78 @@
+//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ subtarget features
+//===----------------------------------------------------------------------===//
+
+include "SystemZFeatures.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ subtarget scheduling models
+//===----------------------------------------------------------------------===//
+
+include "SystemZSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors
+//===----------------------------------------------------------------------===//
+
+include "SystemZProcessors.td"
+
+//===----------------------------------------------------------------------===//
+// Register file description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling convention description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZOperators.td"
+include "SystemZOperands.td"
+include "SystemZPatterns.td"
+include "SystemZInstrFormats.td"
+include "SystemZInstrInfo.td"
+include "SystemZInstrVector.td"
+include "SystemZInstrFP.td"
+include "SystemZInstrHFP.td"
+include "SystemZInstrDFP.td"
+include "SystemZInstrSystem.td"
+
+def SystemZInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Assembly parser
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-level target declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+ let InstructionSet = SystemZInstrInfo;
+ let AssemblyParsers = [SystemZAsmParser];
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 000000000000..b39245b20b3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,527 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Streams SystemZ assembly language and associated data, in the form of
+// MCInsts and MCExprs respectively.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZAsmPrinter.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMCInstLower.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GR32s.
+static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GRH32s.
+static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// R2 register turned into a GR64.
+static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()))
+ .addImm(MI->getOperand(3).getImm())
+ .addImm(MI->getOperand(4).getImm())
+ .addImm(MI->getOperand(5).getImm());
+}
+
+static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) {
+ StringRef Name = "__tls_get_offset";
+ return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name),
+ MCSymbolRefExpr::VK_PLT,
+ Context);
+}
+
+static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
+ StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+ return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name),
+ MCSymbolRefExpr::VK_None,
+ Context);
+}
+
+// MI loads the high part of a vector from memory. Return an instruction
+// that uses replicating vector load Opcode to do the same thing.
+static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg());
+}
+
+// MI stores the high part of a vector to memory. Return an instruction
+// that uses elemental vector store Opcode to do the same thing.
+static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg())
+ .addImm(0);
+}
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SystemZMCInstLower Lower(MF->getContext(), *this);
+ MCInst LoweredMI;
+ switch (MI->getOpcode()) {
+ case SystemZ::Return:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::CondReturn:
+ LoweredMI = MCInstBuilder(SystemZ::BCR)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::CRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CallBRASL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBASR:
+ LoweredMI = MCInstBuilder(SystemZ::BASR)
+ .addReg(SystemZ::R14D)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+
+ case SystemZ::CallJG:
+ LoweredMI = MCInstBuilder(SystemZ::JG)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBRCL:
+ LoweredMI = MCInstBuilder(SystemZ::BRCL)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addExpr(Lower.getExpr(MI->getOperand(2), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBR:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
+ break;
+
+ case SystemZ::CallBCR:
+ LoweredMI = MCInstBuilder(SystemZ::BCR)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SystemZ::R1D);
+ break;
+
+ case SystemZ::CRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::TLS_GDCALL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(getTLSGetOffset(MF->getContext()))
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD));
+ break;
+
+ case SystemZ::TLS_LDCALL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(getTLSGetOffset(MF->getContext()))
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM));
+ break;
+
+ case SystemZ::GOT:
+ LoweredMI = MCInstBuilder(SystemZ::LARL)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(getGlobalOffsetTable(MF->getContext()));
+ break;
+
+ case SystemZ::IILF64:
+ LoweredMI = MCInstBuilder(SystemZ::IILF)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::IIHF64:
+ LoweredMI = MCInstBuilder(SystemZ::IIHF)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::RISBHH:
+ case SystemZ::RISBHL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG);
+ break;
+
+ case SystemZ::RISBLH:
+ case SystemZ::RISBLL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
+ break;
+
+ case SystemZ::VLVGP32:
+ LoweredMI = MCInstBuilder(SystemZ::VLVGP)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg()))
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
+ break;
+
+ case SystemZ::VLR32:
+ case SystemZ::VLR64:
+ LoweredMI = MCInstBuilder(SystemZ::VLR)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
+ break;
+
+ case SystemZ::VL32:
+ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
+ break;
+
+ case SystemZ::VL64:
+ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
+ break;
+
+ case SystemZ::VST32:
+ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
+ break;
+
+ case SystemZ::VST64:
+ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
+ break;
+
+ case SystemZ::LFER:
+ LoweredMI = MCInstBuilder(SystemZ::VLGVF)
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()))
+ .addReg(0).addImm(0);
+ break;
+
+ case SystemZ::LEFR:
+ LoweredMI = MCInstBuilder(SystemZ::VLVGF)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(0).addImm(0);
+ break;
+
+#define LOWER_LOW(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
+
+ LOWER_LOW(IILL);
+ LOWER_LOW(IILH);
+ LOWER_LOW(TMLL);
+ LOWER_LOW(TMLH);
+ LOWER_LOW(NILL);
+ LOWER_LOW(NILH);
+ LOWER_LOW(NILF);
+ LOWER_LOW(OILL);
+ LOWER_LOW(OILH);
+ LOWER_LOW(OILF);
+ LOWER_LOW(XILF);
+
+#undef LOWER_LOW
+
+#define LOWER_HIGH(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break
+
+ LOWER_HIGH(IIHL);
+ LOWER_HIGH(IIHH);
+ LOWER_HIGH(TMHL);
+ LOWER_HIGH(TMHH);
+ LOWER_HIGH(NIHL);
+ LOWER_HIGH(NIHH);
+ LOWER_HIGH(NIHF);
+ LOWER_HIGH(OIHL);
+ LOWER_HIGH(OIHH);
+ LOWER_HIGH(OIHF);
+ LOWER_HIGH(XIHF);
+
+#undef LOWER_HIGH
+
+ case SystemZ::Serialize:
+ if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization())
+ LoweredMI = MCInstBuilder(SystemZ::BCRAsm)
+ .addImm(14).addReg(SystemZ::R0D);
+ else
+ LoweredMI = MCInstBuilder(SystemZ::BCRAsm)
+ .addImm(15).addReg(SystemZ::R0D);
+ break;
+
+ // Emit nothing here but a comment if we can.
+ case SystemZ::MemBarrier:
+ OutStreamer->emitRawComment("MEMBARRIER");
+ return;
+
+ // We want to emit "j .+2" for traps, jumping to the relative immediate field
+ // of the jump instruction, which is an illegal instruction. We cannot emit a
+ // "." symbol, so create and emit a temp label before the instruction and use
+ // that instead.
+ case SystemZ::Trap: {
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(DotSym);
+
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
+ const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
+ LoweredMI = MCInstBuilder(SystemZ::J)
+ .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext));
+ }
+ break;
+
+ // Conditional traps will create a branch on condition instruction that jumps
+ // to the relative immediate field of the jump instruction. (eg. "jo .+2")
+ case SystemZ::CondTrap: {
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(DotSym);
+
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
+ const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
+ LoweredMI = MCInstBuilder(SystemZ::BRC)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext));
+ }
+ break;
+
+ default:
+ Lower.lower(MI, LoweredMI);
+ break;
+ }
+ EmitToStreamer(*OutStreamer, LoweredMI);
+}
+
+// Convert a SystemZ-specific constant pool modifier into the associated
+// MCSymbolRefExpr variant kind.
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
+ switch (Modifier) {
+ case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
+ case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM;
+ case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF;
+ case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
+ }
+ llvm_unreachable("Invalid SystemCPModifier!");
+}
+
+void SystemZAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV);
+
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()),
+ getModifierVariantKind(ZCPV->getModifier()),
+ OutContext);
+ uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType());
+
+ OutStreamer->EmitValue(Expr, Size);
+}
+
+bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ if (ExtraCode && *ExtraCode == 'n') {
+ if (!MI->getOperand(OpNo).isImm())
+ return true;
+ OS << -int64_t(MI->getOperand(OpNo).getImm());
+ } else {
+ SystemZMCInstLower Lower(MF->getContext(), *this);
+ MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
+ SystemZInstPrinter::printOperand(MO, MAI, OS);
+ }
+ return false;
+}
+
+bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
+ MI->getOperand(OpNo + 1).getImm(),
+ MI->getOperand(OpNo + 2).getReg(), OS);
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+ RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
new file mode 100644
index 000000000000..fe8c88fe23e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -0,0 +1,42 @@
+//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineBasicBlock;
+class MachineInstr;
+class Module;
+class raw_ostream;
+
+class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
+public:
+ SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)) {}
+
+ // Override AsmPrinter.
+ StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
+ void EmitInstruction(const MachineInstr *MI) override;
+ void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
new file mode 100644
index 000000000000..72da51f74b10
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -0,0 +1,21 @@
+//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZCallingConv.h"
+#include "SystemZRegisterInfo.h"
+
+using namespace llvm;
+
+const MCPhysReg SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+ SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
+};
+
+const MCPhysReg SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+ SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
+};
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
new file mode 100644
index 000000000000..b5523e586f4c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -0,0 +1,130 @@
+//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
+namespace llvm {
+namespace SystemZ {
+ const unsigned NumArgGPRs = 5;
+ extern const MCPhysReg ArgGPRs[NumArgGPRs];
+
+ const unsigned NumArgFPRs = 4;
+ extern const MCPhysReg ArgFPRs[NumArgFPRs];
+} // end namespace SystemZ
+
+class SystemZCCState : public CCState {
+private:
+ /// Records whether the value was a fixed argument.
+ /// See ISD::OutputArg::IsFixed.
+ SmallVector<bool, 4> ArgIsFixed;
+
+ /// Records whether the value was widened from a short vector type.
+ SmallVector<bool, 4> ArgIsShortVector;
+
+ // Check whether ArgVT is a short vector type.
+ bool IsShortVectorType(EVT ArgVT) {
+ return ArgVT.isVector() && ArgVT.getStoreSize() <= 8;
+ }
+
+public:
+ SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
+ : CCState(CC, isVarArg, MF, locs, C) {}
+
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ // Formal arguments are always fixed.
+ ArgIsFixed.clear();
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ ArgIsFixed.push_back(true);
+ // Record whether the call operand was a short vector.
+ ArgIsShortVector.clear();
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT));
+
+ CCState::AnalyzeFormalArguments(Ins, Fn);
+ }
+
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Record whether the call operand was a fixed argument.
+ ArgIsFixed.clear();
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ ArgIsFixed.push_back(Outs[i].IsFixed);
+ // Record whether the call operand was a short vector.
+ ArgIsShortVector.clear();
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT));
+
+ CCState::AnalyzeCallOperands(Outs, Fn);
+ }
+
+ // This version of AnalyzeCallOperands in the base class is not usable
+ // since we must provide a means of accessing ISD::OutputArg::IsFixed.
+ void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) = delete;
+
+ bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; }
+ bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
+};
+
+// Handle i128 argument types. These need to be passed by implicit
+// reference. This could be as simple as the following .td line:
+// CCIfType<[i128], CCPassIndirect<i64>>,
+// except that i128 is not a legal type, and therefore gets split by
+// common code into a pair of i64 arguments.
+inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // ArgFlags.isSplit() is true on the first part of a i128 argument;
+ // PendingMembers.empty() is false on all subsequent parts.
+ if (!ArgFlags.isSplit() && PendingMembers.empty())
+ return false;
+
+ // Push a pending Indirect value location for each part.
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::Indirect;
+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT,
+ LocVT, LocInfo));
+ if (!ArgFlags.isSplitEnd())
+ return true;
+
+ // OK, we've collected all parts in the pending list. Allocate
+ // the location (register or stack slot) for the indirect pointer.
+ // (This duplicates the usual i64 calling convention rules.)
+ unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs);
+ unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8);
+
+ // Use that same location for all the pending parts.
+ for (auto &It : PendingMembers) {
+ if (Reg)
+ It.convertToReg(Reg);
+ else
+ It.convertToMem(Offset);
+ State.addLoc(It);
+ }
+
+ PendingMembers.clear();
+
+ return true;
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 000000000000..2bf5ac29865f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,122 @@
+//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for the SystemZ ABI.
+//===----------------------------------------------------------------------===//
+
+class CCIfExtend<CCAction A>
+ : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
+
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("static_cast<const SystemZSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).", F),
+ A>;
+
+// Match if this specific argument is a fixed (i.e. named) argument.
+class CCIfFixed<CCAction A>
+ : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>;
+
+// Match if this specific argument was widened from a short vector type.
+class CCIfShortVector<CCAction A>
+ : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>;
+
+
+//===----------------------------------------------------------------------===//
+// z/Linux return value calling convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+ // Promote i32 to i64 if it has an explicit extension type.
+ CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+ // A SwiftError is returned in R9.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
+
+ // ABI-compliant code returns 64-bit integers in R2. Make the other
+ // call-clobbered argument registers available for code that doesn't
+ // care about the ABI. (R6 is an argument register too, but is
+ // call-saved and therefore not suitable for return values.)
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>,
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+ // ABI-complaint code returns float and double in F0. Make the
+ // other floating-point argument registers available for code that
+ // doesn't care about the ABI. All floating-point argument registers
+ // are call-clobbered, so we can use all of them here.
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+ // Similarly for vectors, with V24 being the ABI-compliant choice.
+ // Sub-128 vectors are returned in the same way, but they're widened
+ // to one of these types during type legalization.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// z/Linux argument calling conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+ // Promote i32 to i64 if it has an explicit extension type.
+ // The convention is that true integer arguments that are smaller
+ // than 64 bits should be marked as extended, but structures that
+ // are smaller than 64 bits shouldn't.
+ CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+ // A SwiftSelf is passed in callee-saved R10.
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
+
+ // A SwiftError is passed in callee-saved R9.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
+
+ // Force long double values to the stack and pass i64 pointers to them.
+ CCIfType<[f128], CCPassIndirect<i64>>,
+ // Same for i128 values. These are already split into two i64 here,
+ // so we have to use a custom handler.
+ CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>,
+
+ // The first 5 integer arguments are passed in R2-R6. Note that R6
+ // is call-saved.
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>,
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+ // The first 4 float and double arguments are passed in even registers F0-F6.
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+ // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
+ // are passed in the same way, but they're widened to one of these types
+ // during type legalization.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
+ V25, V27, V29, V31]>>>>,
+
+ // However, sub-128 vectors which need to go on the stack occupy just a
+ // single 8-byte-aligned 8-byte stack slot. Pass as i64.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfShortVector<CCBitConvertToType<i64>>>>,
+
+ // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 8>>>,
+
+ // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// z/Linux callee-saved registers
+//===----------------------------------------------------------------------===//
+def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15),
+ (sequence "F%dD", 8, 15))>;
+
+// R9 is used to return SwiftError; remove it from CSR.
+def CSR_SystemZ_SwiftError : CalleeSavedRegs<(sub CSR_SystemZ, R9D)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
new file mode 100644
index 000000000000..4a6beb67f182
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -0,0 +1,52 @@
+//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+SystemZConstantPoolValue::
+SystemZConstantPoolValue(const GlobalValue *gv,
+ SystemZCP::SystemZCPModifier modifier)
+ : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {}
+
+SystemZConstantPoolValue *
+SystemZConstantPoolValue::Create(const GlobalValue *GV,
+ SystemZCP::SystemZCPModifier Modifier) {
+ return new SystemZConstantPoolValue(GV, Modifier);
+}
+
+int SystemZConstantPoolValue::
+getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
+ for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
+ if (Constants[I].isMachineConstantPoolEntry() &&
+ (Constants[I].getAlignment() & AlignMask) == 0) {
+ auto *ZCPV =
+ static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);
+ if (ZCPV->GV == GV && ZCPV->Modifier == Modifier)
+ return I;
+ }
+ }
+ return -1;
+}
+
+void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(GV);
+ ID.AddInteger(Modifier);
+}
+
+void SystemZConstantPoolValue::print(raw_ostream &O) const {
+ O << GV << "@" << int(Modifier);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
new file mode 100644
index 000000000000..a71b595560d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -0,0 +1,58 @@
+//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace SystemZCP {
+enum SystemZCPModifier {
+ TLSGD,
+ TLSLDM,
+ DTPOFF,
+ NTPOFF
+};
+} // end namespace SystemZCP
+
+/// A SystemZ-specific constant pool value. At present, the only
+/// defined constant pool values are module IDs or offsets of
+/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF,
+/// or x@NTPOFF).
+class SystemZConstantPoolValue : public MachineConstantPoolValue {
+ const GlobalValue *GV;
+ SystemZCP::SystemZCPModifier Modifier;
+
+protected:
+ SystemZConstantPoolValue(const GlobalValue *GV,
+ SystemZCP::SystemZCPModifier Modifier);
+
+public:
+ static SystemZConstantPoolValue *
+ Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
+
+ // Override MachineConstantPoolValue.
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) override;
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+ void print(raw_ostream &O) const override;
+
+ // Access SystemZ-specific fields.
+ const GlobalValue *getGlobalValue() const { return GV; }
+ SystemZCP::SystemZCPModifier getModifier() const { return Modifier; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
new file mode 100644
index 000000000000..d70f9e90cd3e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -0,0 +1,582 @@
+//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass:
+// (1) tries to remove compares if CC already contains the required information
+// (2) fuses compares and branches into COMPARE AND BRANCH instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-elim-compare"
+
+STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
+STATISTIC(LoadAndTraps, "Number of load-and-trap instructions");
+STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
+STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
+
+namespace {
+
+// Represents the references to a particular register in one or more
+// instructions.
+struct Reference {
+ Reference() = default;
+
+ Reference &operator|=(const Reference &Other) {
+ Def |= Other.Def;
+ Use |= Other.Use;
+ return *this;
+ }
+
+ explicit operator bool() const { return Def || Use; }
+
+ // True if the register is defined or used in some form, either directly or
+ // via a sub- or super-register.
+ bool Def = false;
+ bool Use = false;
+};
+
+class SystemZElimCompare : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SystemZElimCompare(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "SystemZ Comparison Elimination";
+ }
+
+ bool processBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ Reference getRegReferences(MachineInstr &MI, unsigned Reg);
+ bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool convertToLoadAndTest(MachineInstr &MI);
+ bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool optimizeCompareZero(MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool fuseCompareOperations(MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
+
+ const SystemZInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+};
+
+char SystemZElimCompare::ID = 0;
+
+} // end anonymous namespace
+
+// Return true if CC is live out of MBB.
+static bool isCCLiveOut(MachineBasicBlock &MBB) {
+ for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(SystemZ::CC))
+ return true;
+ return false;
+}
+
+// Return true if any CC result of MI would reflect the value of Reg.
+static bool resultTests(MachineInstr &MI, unsigned Reg) {
+ if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg)
+ return true;
+
+ switch (MI.getOpcode()) {
+ case SystemZ::LR:
+ case SystemZ::LGR:
+ case SystemZ::LGFR:
+ case SystemZ::LTR:
+ case SystemZ::LTGR:
+ case SystemZ::LTGFR:
+ case SystemZ::LER:
+ case SystemZ::LDR:
+ case SystemZ::LXR:
+ case SystemZ::LTEBR:
+ case SystemZ::LTDBR:
+ case SystemZ::LTXBR:
+ if (MI.getOperand(1).getReg() == Reg)
+ return true;
+ }
+
+ return false;
+}
+
+// Describe the references to Reg or any of its aliases in MI.
+Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
+ Reference Ref;
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (MO.isReg()) {
+ if (unsigned MOReg = MO.getReg()) {
+ if (TRI->regsOverlap(MOReg, Reg)) {
+ if (MO.isUse())
+ Ref.Use = true;
+ else if (MO.isDef())
+ Ref.Def = true;
+ }
+ }
+ }
+ }
+ return Ref;
+}
+
+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr &MI) {
+ // If we during isel used a load-and-test as a compare with 0, the
+ // def operand is dead.
+ return (MI.getOpcode() == SystemZ::LTEBR ||
+ MI.getOpcode() == SystemZ::LTDBR ||
+ MI.getOpcode() == SystemZ::LTXBR) &&
+ MI.getOperand(0).isDead();
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr &Compare) {
+ unsigned reg = 0;
+ if (Compare.isCompare())
+ reg = Compare.getOperand(0).getReg();
+ else if (isLoadAndTestAsCmp(Compare))
+ reg = Compare.getOperand(1).getReg();
+ assert(reg);
+
+ return reg;
+}
+
+// Compare compares the result of MI against zero. If MI is an addition
+// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
+// and convert the branch to a BRCT(G) or BRCTH. Return true on success.
+bool SystemZElimCompare::convertToBRCT(
+ MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ // Check whether we have an addition of -1.
+ unsigned Opcode = MI.getOpcode();
+ unsigned BRCT;
+ if (Opcode == SystemZ::AHI)
+ BRCT = SystemZ::BRCT;
+ else if (Opcode == SystemZ::AGHI)
+ BRCT = SystemZ::BRCTG;
+ else if (Opcode == SystemZ::AIH)
+ BRCT = SystemZ::BRCTH;
+ else
+ return false;
+ if (MI.getOperand(2).getImm() != -1)
+ return false;
+
+ // Check whether we have a single JLH.
+ if (CCUsers.size() != 1)
+ return false;
+ MachineInstr *Branch = CCUsers[0];
+ if (Branch->getOpcode() != SystemZ::BRC ||
+ Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
+ Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE)
+ return false;
+
+ // We already know that there are no references to the register between
+ // MI and Compare. Make sure that there are also no references between
+ // Compare and Branch.
+ unsigned SrcReg = getCompareSourceReg(Compare);
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ if (getRegReferences(*MBBI, SrcReg))
+ return false;
+
+ // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH.
+ MachineOperand Target(Branch->getOperand(2));
+ while (Branch->getNumOperands())
+ Branch->RemoveOperand(0);
+ Branch->setDesc(TII->get(BRCT));
+ MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
+ MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target);
+ // Add a CC def to BRCT(G), since we may have to split them again if the
+ // branch displacement overflows. BRCTH has a 32-bit displacement, so
+ // this is not necessary there.
+ if (BRCT != SystemZ::BRCTH)
+ MIB.addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ MI.eraseFromParent();
+ return true;
+}
+
+// Compare compares the result of MI against zero. If MI is a suitable load
+// instruction and if CCUsers is a single conditional trap on zero, eliminate
+// the load and convert the branch to a load-and-trap. Return true on success.
+bool SystemZElimCompare::convertToLoadAndTrap(
+ MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ unsigned LATOpcode = TII->getLoadAndTrap(MI.getOpcode());
+ if (!LATOpcode)
+ return false;
+
+ // Check whether we have a single CondTrap that traps on zero.
+ if (CCUsers.size() != 1)
+ return false;
+ MachineInstr *Branch = CCUsers[0];
+ if (Branch->getOpcode() != SystemZ::CondTrap ||
+ Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
+ Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_EQ)
+ return false;
+
+ // We already know that there are no references to the register between
+ // MI and Compare. Make sure that there are also no references between
+ // Compare and Branch.
+ unsigned SrcReg = getCompareSourceReg(Compare);
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ if (getRegReferences(*MBBI, SrcReg))
+ return false;
+
+ // The transformation is OK. Rebuild Branch as a load-and-trap.
+ while (Branch->getNumOperands())
+ Branch->RemoveOperand(0);
+ Branch->setDesc(TII->get(LATOpcode));
+ MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ MI.eraseFromParent();
+ return true;
+}
+
+// If MI is a load instruction, try to convert it into a LOAD AND TEST.
+// Return true on success.
+bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) {
+ unsigned Opcode = TII->getLoadAndTest(MI.getOpcode());
+ if (!Opcode)
+ return false;
+
+ MI.setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
+ return true;
+}
+
+// The CC users in CCUsers are testing the result of a comparison of some
+// value X against zero and we know that any CC value produced by MI
+// would also reflect the value of X. Try to adjust CCUsers so that
+// they test the result of MI directly, returning true on success.
+// Leave everything unchanged on failure.
+bool SystemZElimCompare::adjustCCMasksForInstr(
+ MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ int Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ unsigned MIFlags = Desc.TSFlags;
+
+ // See which compare-style condition codes are available.
+ unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
+
+ // For unsigned comparisons with zero, only equality makes sense.
+ unsigned CompareFlags = Compare.getDesc().TSFlags;
+ if (CompareFlags & SystemZII::IsLogical)
+ ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
+
+ if (ReusableCCMask == 0)
+ return false;
+
+ unsigned CCValues = SystemZII::getCCValues(MIFlags);
+ assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+
+ // Now check whether these flags are enough for all users.
+ SmallVector<MachineOperand *, 4> AlterMasks;
+ for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) {
+ MachineInstr *MI = CCUsers[I];
+
+ // Fail if this isn't a use of CC that we understand.
+ unsigned Flags = MI->getDesc().TSFlags;
+ unsigned FirstOpNum;
+ if (Flags & SystemZII::CCMaskFirst)
+ FirstOpNum = 0;
+ else if (Flags & SystemZII::CCMaskLast)
+ FirstOpNum = MI->getNumExplicitOperands() - 2;
+ else
+ return false;
+
+ // Check whether the instruction predicate treats all CC values
+ // outside of ReusableCCMask in the same way. In that case it
+ // doesn't matter what those CC values mean.
+ unsigned CCValid = MI->getOperand(FirstOpNum).getImm();
+ unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm();
+ unsigned OutValid = ~ReusableCCMask & CCValid;
+ unsigned OutMask = ~ReusableCCMask & CCMask;
+ if (OutMask != 0 && OutMask != OutValid)
+ return false;
+
+ AlterMasks.push_back(&MI->getOperand(FirstOpNum));
+ AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1));
+ }
+
+ // All users are OK. Adjust the masks for MI.
+ for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
+ AlterMasks[I]->setImm(CCValues);
+ unsigned CCMask = AlterMasks[I + 1]->getImm();
+ if (CCMask & ~ReusableCCMask)
+ AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
+ (CCValues & ~ReusableCCMask));
+ }
+
+ // CC is now live after MI.
+ int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
+ assert(CCDef >= 0 && "Couldn't find CC set");
+ MI.getOperand(CCDef).setIsDead(false);
+
+ // Clear any intervening kills of CC.
+ MachineBasicBlock::iterator MBBI = MI, MBBE = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ MBBI->clearRegisterKills(SystemZ::CC, TRI);
+
+ return true;
+}
+
+// Return true if Compare is a comparison against zero.
+static bool isCompareZero(MachineInstr &Compare) {
+ switch (Compare.getOpcode()) {
+ case SystemZ::LTEBRCompare:
+ case SystemZ::LTDBRCompare:
+ case SystemZ::LTXBRCompare:
+ return true;
+
+ default:
+ if (isLoadAndTestAsCmp(Compare))
+ return true;
+ return Compare.getNumExplicitOperands() == 2 &&
+ Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
+ }
+}
+
+// Try to optimize cases where comparison instruction Compare is testing
+// a value against zero. Return true on success and if Compare should be
+// deleted as dead. CCUsers is the list of instructions that use the CC
+// value produced by Compare.
+bool SystemZElimCompare::optimizeCompareZero(
+ MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
+ if (!isCompareZero(Compare))
+ return false;
+
+ // Search back for CC results that are based on the first operand.
+ unsigned SrcReg = getCompareSourceReg(Compare);
+ MachineBasicBlock &MBB = *Compare.getParent();
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
+ Reference CCRefs;
+ Reference SrcRefs;
+ while (MBBI != MBBE) {
+ --MBBI;
+ MachineInstr &MI = *MBBI;
+ if (resultTests(MI, SrcReg)) {
+ // Try to remove both MI and Compare by converting a branch to BRCT(G).
+ // or a load-and-trap instruction. We don't care in this case whether
+ // CC is modified between MI and Compare.
+ if (!CCRefs.Use && !SrcRefs) {
+ if (convertToBRCT(MI, Compare, CCUsers)) {
+ BranchOnCounts += 1;
+ return true;
+ }
+ if (convertToLoadAndTrap(MI, Compare, CCUsers)) {
+ LoadAndTraps += 1;
+ return true;
+ }
+ }
+ // Try to eliminate Compare by reusing a CC result from MI.
+ if ((!CCRefs && convertToLoadAndTest(MI)) ||
+ (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) {
+ EliminatedComparisons += 1;
+ return true;
+ }
+ }
+ SrcRefs |= getRegReferences(MI, SrcReg);
+ if (SrcRefs.Def)
+ return false;
+ CCRefs |= getRegReferences(MI, SystemZ::CC);
+ if (CCRefs.Use && CCRefs.Def)
+ return false;
+ }
+ return false;
+}
+
+// Try to fuse comparison instruction Compare into a later branch.
+// Return true on success and if Compare is therefore redundant.
+bool SystemZElimCompare::fuseCompareOperations(
+ MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
+ // See whether we have a single branch with which to fuse.
+ if (CCUsers.size() != 1)
+ return false;
+ MachineInstr *Branch = CCUsers[0];
+ SystemZII::FusedCompareType Type;
+ switch (Branch->getOpcode()) {
+ case SystemZ::BRC:
+ Type = SystemZII::CompareAndBranch;
+ break;
+ case SystemZ::CondReturn:
+ Type = SystemZII::CompareAndReturn;
+ break;
+ case SystemZ::CallBCR:
+ Type = SystemZII::CompareAndSibcall;
+ break;
+ case SystemZ::CondTrap:
+ Type = SystemZII::CompareAndTrap;
+ break;
+ default:
+ return false;
+ }
+
+ // See whether we have a comparison that can be fused.
+ unsigned FusedOpcode =
+ TII->getFusedCompare(Compare.getOpcode(), Type, &Compare);
+ if (!FusedOpcode)
+ return false;
+
+ // Make sure that the operands are available at the branch.
+ // SrcReg2 is the register if the source operand is a register,
+ // 0 if the source operand is immediate, and the base register
+ // if the source operand is memory (index is not supported).
+ unsigned SrcReg = Compare.getOperand(0).getReg();
+ unsigned SrcReg2 =
+ Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
+ MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+ for (++MBBI; MBBI != MBBE; ++MBBI)
+ if (MBBI->modifiesRegister(SrcReg, TRI) ||
+ (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
+ return false;
+
+ // Read the branch mask, target (if applicable), regmask (if applicable).
+ MachineOperand CCMask(MBBI->getOperand(1));
+ assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
+ "Invalid condition-code mask for integer comparison");
+ // This is only valid for CompareAndBranch.
+ MachineOperand Target(MBBI->getOperand(
+ Type == SystemZII::CompareAndBranch ? 2 : 0));
+ const uint32_t *RegMask;
+ if (Type == SystemZII::CompareAndSibcall)
+ RegMask = MBBI->getOperand(2).getRegMask();
+
+ // Clear out all current operands.
+ int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
+ assert(CCUse >= 0 && "BRC/BCR must use CC");
+ Branch->RemoveOperand(CCUse);
+ // Remove target (branch) or regmask (sibcall).
+ if (Type == SystemZII::CompareAndBranch ||
+ Type == SystemZII::CompareAndSibcall)
+ Branch->RemoveOperand(2);
+ Branch->RemoveOperand(1);
+ Branch->RemoveOperand(0);
+
+ // Rebuild Branch as a fused compare and branch.
+ // SrcNOps is the number of MI operands of the compare instruction
+ // that we need to copy over.
+ unsigned SrcNOps = 2;
+ if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT)
+ SrcNOps = 3;
+ Branch->setDesc(TII->get(FusedOpcode));
+ MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
+ for (unsigned I = 0; I < SrcNOps; I++)
+ MIB.add(Compare.getOperand(I));
+ MIB.add(CCMask);
+
+ if (Type == SystemZII::CompareAndBranch) {
+ // Only conditional branches define CC, as they may be converted back
+ // to a non-fused branch because of a long displacement. Conditional
+ // returns don't have that problem.
+ MIB.add(Target).addReg(SystemZ::CC,
+ RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ if (Type == SystemZII::CompareAndSibcall)
+ MIB.addRegMask(RegMask);
+
+ // Clear any intervening kills of SrcReg and SrcReg2.
+ MBBI = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ MBBI->clearRegisterKills(SrcReg, TRI);
+ if (SrcReg2)
+ MBBI->clearRegisterKills(SrcReg2, TRI);
+ }
+ FusedComparisons += 1;
+ return true;
+}
+
+// Process all comparison instructions in MBB. Return true if something
+// changed.
+bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ // Walk backwards through the block looking for comparisons, recording
+ // all CC users as we go. The subroutines can delete Compare and
+ // instructions before it.
+ bool CompleteCCUsers = !isCCLiveOut(MBB);
+ SmallVector<MachineInstr *, 4> CCUsers;
+ MachineBasicBlock::iterator MBBI = MBB.end();
+ while (MBBI != MBB.begin()) {
+ MachineInstr &MI = *--MBBI;
+ if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) &&
+ (optimizeCompareZero(MI, CCUsers) ||
+ fuseCompareOperations(MI, CCUsers))) {
+ ++MBBI;
+ MI.eraseFromParent();
+ Changed = true;
+ CCUsers.clear();
+ continue;
+ }
+
+ if (MI.definesRegister(SystemZ::CC)) {
+ CCUsers.clear();
+ CompleteCCUsers = true;
+ }
+ if (MI.readsRegister(SystemZ::CC) && CompleteCCUsers)
+ CCUsers.push_back(&MI);
+ }
+ return Changed;
+}
+
+bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
+ if (skipFunction(*F.getFunction()))
+ return false;
+
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ TRI = &TII->getRegisterInfo();
+
+ bool Changed = false;
+ for (auto &MBB : F)
+ Changed |= processBlock(MBB);
+
+ return Changed;
+}
+
+FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
+ return new SystemZElimCompare(TM);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp
new file mode 100644
index 000000000000..d02db9a617a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp
@@ -0,0 +1,153 @@
+//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling and other late optimizations. This
+// pass should be run after register allocation but before the post-regalloc
+// scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass"
+
+namespace llvm {
+ void initializeSystemZExpandPseudoPass(PassRegistry&);
+}
+
+namespace {
+class SystemZExpandPseudo : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZExpandPseudo() : MachineFunctionPass(ID) {
+ initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ const SystemZInstrInfo *TII;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+};
+char SystemZExpandPseudo::ID = 0;
+}
+
+INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo",
+ SYSTEMZ_EXPAND_PSEUDO_NAME, false, false)
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) {
+ return new SystemZExpandPseudo();
+}
+
+// MI is a load-register-on-condition pseudo instruction that could not be
+// handled as a single hardware instruction. Replace it by a branch sequence.
+bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction &MF = *MBB.getParent();
+ const BasicBlock *BB = MBB.getBasicBlock();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ unsigned CCValid = MI.getOperand(3).getImm();
+ unsigned CCMask = MI.getOperand(4).getImm();
+
+ LivePhysRegs LiveRegs(TII->getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ for (auto I = std::prev(MBB.end()); I != MBBI; --I)
+ LiveRegs.stepBackward(*I);
+
+ // Splice MBB at MI, moving the rest of the block into RestMBB.
+ MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
+ RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
+ RestMBB->transferSuccessors(&MBB);
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ RestMBB->addLiveIn(*I);
+
+ // Create a new block MoveMBB to hold the move instruction.
+ MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
+ MoveMBB->addLiveIn(SrcReg);
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ MoveMBB->addLiveIn(*I);
+
+ // At the end of MBB, create a conditional branch to RestMBB if the
+ // condition is false, otherwise fall through to MoveMBB.
+ BuildMI(&MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB);
+ MBB.addSuccessor(RestMBB);
+ MBB.addSuccessor(MoveMBB);
+
+ // In MoveMBB, emit an instruction to move SrcReg into DestReg,
+ // then fall through to RestMBB.
+ TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg,
+ MI.getOperand(2).isKill());
+ MoveMBB->addSuccessor(RestMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ return true;
+}
+
+/// \brief If MBBI references a pseudo instruction that should be expanded here,
+/// do the expansion and return true. Otherwise return false.
+bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
+ case SystemZ::LOCRMux:
+ return expandLOCRMux(MBB, MBBI, NextMBBI);
+ default:
+ break;
+ }
+ return false;
+}
+
+/// \brief Iterate over the instructions in basic block MBB and expand any
+/// pseudo instructions. Return true if anything was modified.
+bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td
new file mode 100644
index 000000000000..c5faa0d62881
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -0,0 +1,213 @@
+//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Feature definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZFeature<string extname, string intname, string desc>
+ : Predicate<"Subtarget->has"##intname##"()">,
+ AssemblerPredicate<"Feature"##intname, extname>,
+ SubtargetFeature<extname, "Has"##intname, "true", desc>;
+
+class SystemZMissingFeature<string intname>
+ : Predicate<"!Subtarget->has"##intname##"()">;
+
+class SystemZFeatureList<list<SystemZFeature> x> {
+ list<SystemZFeature> List = x;
+}
+
+class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>
+ : SystemZFeatureList<!listconcat(x, y)>;
+
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Ninth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureDistinctOps : SystemZFeature<
+ "distinct-ops", "DistinctOps",
+ "Assume that the distinct-operands facility is installed"
+>;
+
+def FeatureFastSerialization : SystemZFeature<
+ "fast-serialization", "FastSerialization",
+ "Assume that the fast-serialization facility is installed"
+>;
+
+def FeatureFPExtension : SystemZFeature<
+ "fp-extension", "FPExtension",
+ "Assume that the floating-point extension facility is installed"
+>;
+
+def FeatureHighWord : SystemZFeature<
+ "high-word", "HighWord",
+ "Assume that the high-word facility is installed"
+>;
+
+def FeatureInterlockedAccess1 : SystemZFeature<
+ "interlocked-access1", "InterlockedAccess1",
+ "Assume that interlocked-access facility 1 is installed"
+>;
+def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
+
+def FeatureLoadStoreOnCond : SystemZFeature<
+ "load-store-on-cond", "LoadStoreOnCond",
+ "Assume that the load/store-on-condition facility is installed"
+>;
+
+def FeaturePopulationCount : SystemZFeature<
+ "population-count", "PopulationCount",
+ "Assume that the population-count facility is installed"
+>;
+
+def FeatureMessageSecurityAssist3 : SystemZFeature<
+ "message-security-assist-extension3", "MessageSecurityAssist3",
+ "Assume that the message-security-assist extension facility 3 is installed"
+>;
+
+def FeatureMessageSecurityAssist4 : SystemZFeature<
+ "message-security-assist-extension4", "MessageSecurityAssist4",
+ "Assume that the message-security-assist extension facility 4 is installed"
+>;
+
+def FeatureResetReferenceBitsMultiple : SystemZFeature<
+ "reset-reference-bits-multiple", "ResetReferenceBitsMultiple",
+ "Assume that the reset-reference-bits-multiple facility is installed"
+>;
+
+def Arch9NewFeatures : SystemZFeatureList<[
+ FeatureDistinctOps,
+ FeatureFastSerialization,
+ FeatureFPExtension,
+ FeatureHighWord,
+ FeatureInterlockedAccess1,
+ FeatureLoadStoreOnCond,
+ FeaturePopulationCount,
+ FeatureMessageSecurityAssist3,
+ FeatureMessageSecurityAssist4,
+ FeatureResetReferenceBitsMultiple
+]>;
+
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Tenth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureExecutionHint : SystemZFeature<
+ "execution-hint", "ExecutionHint",
+ "Assume that the execution-hint facility is installed"
+>;
+
+def FeatureLoadAndTrap : SystemZFeature<
+ "load-and-trap", "LoadAndTrap",
+ "Assume that the load-and-trap facility is installed"
+>;
+
+def FeatureMiscellaneousExtensions : SystemZFeature<
+ "miscellaneous-extensions", "MiscellaneousExtensions",
+ "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+ "processor-assist", "ProcessorAssist",
+ "Assume that the processor-assist facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+ "transactional-execution", "TransactionalExecution",
+ "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureDFPZonedConversion : SystemZFeature<
+ "dfp-zoned-conversion", "DFPZonedConversion",
+ "Assume that the DFP zoned-conversion facility is installed"
+>;
+
+def FeatureEnhancedDAT2 : SystemZFeature<
+ "enhanced-dat-2", "EnhancedDAT2",
+ "Assume that the enhanced-DAT facility 2 is installed"
+>;
+
+def Arch10NewFeatures : SystemZFeatureList<[
+ FeatureExecutionHint,
+ FeatureLoadAndTrap,
+ FeatureMiscellaneousExtensions,
+ FeatureProcessorAssist,
+ FeatureTransactionalExecution,
+ FeatureDFPZonedConversion,
+ FeatureEnhancedDAT2
+]>;
+
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Eleventh Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureLoadAndZeroRightmostByte : SystemZFeature<
+ "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte",
+ "Assume that the load-and-zero-rightmost-byte facility is installed"
+>;
+
+def FeatureLoadStoreOnCond2 : SystemZFeature<
+ "load-store-on-cond-2", "LoadStoreOnCond2",
+ "Assume that the load/store-on-condition facility 2 is installed"
+>;
+
+def FeatureMessageSecurityAssist5 : SystemZFeature<
+ "message-security-assist-extension5", "MessageSecurityAssist5",
+ "Assume that the message-security-assist extension facility 5 is installed"
+>;
+
+def FeatureDFPPackedConversion : SystemZFeature<
+ "dfp-packed-conversion", "DFPPackedConversion",
+ "Assume that the DFP packed-conversion facility is installed"
+>;
+
+def FeatureVector : SystemZFeature<
+ "vector", "Vector",
+ "Assume that the vectory facility is installed"
+>;
+def FeatureNoVector : SystemZMissingFeature<"Vector">;
+
+def Arch11NewFeatures : SystemZFeatureList<[
+ FeatureLoadAndZeroRightmostByte,
+ FeatureLoadStoreOnCond2,
+ FeatureMessageSecurityAssist5,
+ FeatureDFPPackedConversion,
+ FeatureVector
+]>;
+
+//===----------------------------------------------------------------------===//
+//
+// Cumulative supported and unsupported feature sets
+//
+//===----------------------------------------------------------------------===//
+
+def Arch8SupportedFeatures
+ : SystemZFeatureList<[]>;
+def Arch9SupportedFeatures
+ : SystemZFeatureAdd<Arch8SupportedFeatures.List, Arch9NewFeatures.List>;
+def Arch10SupportedFeatures
+ : SystemZFeatureAdd<Arch9SupportedFeatures.List, Arch10NewFeatures.List>;
+def Arch11SupportedFeatures
+ : SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
+
+def Arch11UnsupportedFeatures
+ : SystemZFeatureList<[]>;
+def Arch10UnsupportedFeatures
+ : SystemZFeatureAdd<Arch11UnsupportedFeatures.List, Arch11NewFeatures.List>;
+def Arch9UnsupportedFeatures
+ : SystemZFeatureAdd<Arch10UnsupportedFeatures.List, Arch10NewFeatures.List>;
+def Arch8UnsupportedFeatures
+ : SystemZFeatureAdd<Arch9UnsupportedFeatures.List, Arch9NewFeatures.List>;
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 000000000000..0cb2b5a14ce7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,562 @@
+//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+namespace {
+// The ABI-defined register save slots, relative to the incoming stack
+// pointer.
+static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
+ { SystemZ::R2D, 0x10 },
+ { SystemZ::R3D, 0x18 },
+ { SystemZ::R4D, 0x20 },
+ { SystemZ::R5D, 0x28 },
+ { SystemZ::R6D, 0x30 },
+ { SystemZ::R7D, 0x38 },
+ { SystemZ::R8D, 0x40 },
+ { SystemZ::R9D, 0x48 },
+ { SystemZ::R10D, 0x50 },
+ { SystemZ::R11D, 0x58 },
+ { SystemZ::R12D, 0x60 },
+ { SystemZ::R13D, 0x68 },
+ { SystemZ::R14D, 0x70 },
+ { SystemZ::R15D, 0x78 },
+ { SystemZ::F0D, 0x80 },
+ { SystemZ::F2D, 0x88 },
+ { SystemZ::F4D, 0x90 },
+ { SystemZ::F6D, 0x98 }
+};
+} // end anonymous namespace
+
+SystemZFrameLowering::SystemZFrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+ -SystemZMC::CallFrameSize, 8,
+ false /* StackRealignable */) {
+ // Create a mapping from register number to save slot offset.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ NumEntries = array_lengthof(SpillOffsetTable);
+ return SpillOffsetTable;
+}
+
+void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ bool HasFP = hasFP(MF);
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ bool IsVarArg = MF.getFunction()->isVarArg();
+
+ // va_start stores incoming FPR varargs in the normal way, but delegates
+ // the saving of incoming GPR varargs to spillCalleeSavedRegisters().
+ // Record these pending uses, which typically include the call-saved
+ // argument register R6D.
+ if (IsVarArg)
+ for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+ SavedRegs.set(SystemZ::ArgGPRs[I]);
+
+ // If there are any landing pads, entering them will modify r6/r7.
+ if (!MF.getLandingPads().empty()) {
+ SavedRegs.set(SystemZ::R6D);
+ SavedRegs.set(SystemZ::R7D);
+ }
+
+ // If the function requires a frame pointer, record that the hard
+ // frame pointer will be clobbered.
+ if (HasFP)
+ SavedRegs.set(SystemZ::R11D);
+
+ // If the function calls other functions, record that the return
+ // address register will be clobbered.
+ if (MFFrame.hasCalls())
+ SavedRegs.set(SystemZ::R14D);
+
+ // If we are saving GPRs other than the stack pointer, we might as well
+ // save and restore the stack pointer at the same time, via STMG and LMG.
+ // This allows the deallocation to be done by the LMG, rather than needing
+ // a separate %r15 addition.
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ unsigned Reg = CSRegs[I];
+ if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) {
+ SavedRegs.set(SystemZ::R15D);
+ break;
+ }
+ }
+}
+
+// Add GPR64 to the save instruction being built by MIB, which is in basic
+// block MBB. IsImplicit says whether this is an explicit operand to the
+// instruction, or an implicit one that comes between the explicit start
+// and end registers.
+static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
+ unsigned GPR64, bool IsImplicit) {
+ const TargetRegisterInfo *RI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
+ bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
+ if (!IsLive || !IsImplicit) {
+ MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
+ if (!IsLive)
+ MBB.addLiveIn(GPR64);
+ }
+}
+
+bool SystemZFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ bool IsVarArg = MF.getFunction()->isVarArg();
+ DebugLoc DL;
+
+ // Scan the call-saved GPRs and find the bounds of the register spill area.
+ unsigned LowGPR = 0;
+ unsigned HighGPR = SystemZ::R15D;
+ unsigned StartOffset = -1U;
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg)) {
+ unsigned Offset = RegSpillOffsets[Reg];
+ assert(Offset && "Unexpected GPR save");
+ if (StartOffset > Offset) {
+ LowGPR = Reg;
+ StartOffset = Offset;
+ }
+ }
+ }
+
+ // Save the range of call-saved registers, for use by the epilogue inserter.
+ ZFI->setLowSavedGPR(LowGPR);
+ ZFI->setHighSavedGPR(HighGPR);
+
+ // Include the GPR varargs, if any. R6D is call-saved, so would
+ // be included by the loop above, but we also need to handle the
+ // call-clobbered argument registers.
+ if (IsVarArg) {
+ unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+ if (FirstGPR < SystemZ::NumArgGPRs) {
+ unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+ unsigned Offset = RegSpillOffsets[Reg];
+ if (StartOffset > Offset) {
+ LowGPR = Reg; StartOffset = Offset;
+ }
+ }
+ }
+
+ // Save GPRs
+ if (LowGPR) {
+ assert(LowGPR != HighGPR && "Should be saving %r15 and something else");
+
+ // Build an STMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+ // Add the explicit register operands.
+ addSavedGPR(MBB, MIB, LowGPR, false);
+ addSavedGPR(MBB, MIB, HighGPR, false);
+
+ // Add the address.
+ MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+
+ // Make sure all call-saved GPRs are included as operands and are
+ // marked as live on entry.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg))
+ addSavedGPR(MBB, MIB, Reg, true);
+ }
+
+ // ...likewise GPR varargs.
+ if (IsVarArg)
+ for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+ addSavedGPR(MBB, MIB, SystemZ::ArgGPRs[I], true);
+ }
+
+ // Save FPRs in the normal TargetInstrInfo way.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+bool SystemZFrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ bool HasFP = hasFP(MF);
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Restore FPRs in the normal TargetInstrInfo way.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg))
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+
+ // Restore call-saved GPRs (but not call-clobbered varargs, which at
+ // this point might hold return values).
+ unsigned LowGPR = ZFI->getLowSavedGPR();
+ unsigned HighGPR = ZFI->getHighSavedGPR();
+ unsigned StartOffset = RegSpillOffsets[LowGPR];
+ if (LowGPR) {
+ // If we saved any of %r2-%r5 as varargs, we should also be saving
+ // and restoring %r6. If we're saving %r6 or above, we should be
+ // restoring it too.
+ assert(LowGPR != HighGPR && "Should be loading %r15 and something else");
+
+ // Build an LMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+ // Add the explicit register operands.
+ MIB.addReg(LowGPR, RegState::Define);
+ MIB.addReg(HighGPR, RegState::Define);
+
+ // Add the address.
+ MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
+ MIB.addImm(StartOffset);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg != LowGPR && Reg != HighGPR &&
+ SystemZ::GR64BitRegClass.contains(Reg))
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+
+ return true;
+}
+
+void SystemZFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ // Get the size of our stack frame to be allocated ...
+ uint64_t StackSize = (MFFrame.estimateStackSize(MF) +
+ SystemZMC::CallFrameSize);
+ // ... and the maximum offset we may need to reach into the
+ // caller's frame to access the save area or stack arguments.
+ int64_t MaxArgOffset = SystemZMC::CallFrameSize;
+ for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I)
+ if (MFFrame.getObjectOffset(I) >= 0) {
+ int64_t ArgOffset = SystemZMC::CallFrameSize +
+ MFFrame.getObjectOffset(I) +
+ MFFrame.getObjectSize(I);
+ MaxArgOffset = std::max(MaxArgOffset, ArgOffset);
+ }
+
+ uint64_t MaxReach = StackSize + MaxArgOffset;
+ if (!isUInt<12>(MaxReach)) {
+ // We may need register scavenging slots if some parts of the frame
+ // are outside the reach of an unsigned 12-bit displacement.
+ // Create 2 for the case where both addresses in an MVC are
+ // out of range.
+ RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
+ RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
+ }
+}
+
+// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
+static void emitIncrement(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &DL,
+ unsigned Reg, int64_t NumBytes,
+ const TargetInstrInfo *TII) {
+ while (NumBytes) {
+ unsigned Opcode;
+ int64_t ThisVal = NumBytes;
+ if (isInt<16>(NumBytes))
+ Opcode = SystemZ::AGHI;
+ else {
+ Opcode = SystemZ::AGFI;
+ // Make sure we maintain 8-byte stack alignment.
+ int64_t MinVal = -uint64_t(1) << 31;
+ int64_t MaxVal = (int64_t(1) << 31) - 8;
+ if (ThisVal < MinVal)
+ ThisVal = MinVal;
+ else if (ThisVal > MaxVal)
+ ThisVal = MaxVal;
+ }
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg)
+ .addReg(Reg).addImm(ThisVal);
+ // The CC implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ NumBytes -= ThisVal;
+ }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo();
+ bool HasFP = hasFP(MF);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ // The current offset of the stack pointer from the CFA.
+ int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+
+ if (ZFI->getLowSavedGPR()) {
+ // Skip over the GPR saves.
+ if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
+ ++MBBI;
+ else
+ llvm_unreachable("Couldn't skip over GPR saves");
+
+ // Add CFI for the GPR saves.
+ for (auto &Save : CSI) {
+ unsigned Reg = Save.getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg)) {
+ int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+ }
+
+ uint64_t StackSize = getAllocatedStackSize(MF);
+ if (StackSize) {
+ // Determine if we want to store a backchain.
+ bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+ // If we need backchain, save current stack pointer. R1 is free at this
+ // point.
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
+
+ // Allocate StackSize bytes.
+ int64_t Delta = -int64_t(StackSize);
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+
+ // Add CFI for the allocation.
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ SPOffsetFromCFA += Delta;
+
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0);
+ }
+
+ if (HasFP) {
+ // Copy the base of the frame to R11.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D)
+ .addReg(SystemZ::R15D);
+
+ // Add CFI for the new frame location.
+ unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Mark the FramePtr as live at the beginning of every block except
+ // the entry block. (We'll have marked R11 as live on entry when
+ // saving the GPRs.)
+ for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
+ I->addLiveIn(SystemZ::R11D);
+ }
+
+ // Skip over the FPR saves.
+ SmallVector<unsigned, 8> CFIIndexes;
+ for (auto &Save : CSI) {
+ unsigned Reg = Save.getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ if (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == SystemZ::STD ||
+ MBBI->getOpcode() == SystemZ::STDY))
+ ++MBBI;
+ else
+ llvm_unreachable("Couldn't skip over FPR save");
+
+ // Add CFI for the this save.
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+ unsigned IgnoredFrameReg;
+ int64_t Offset =
+ getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
+
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, DwarfReg, SPOffsetFromCFA + Offset));
+ CFIIndexes.push_back(CFIIndex);
+ }
+ }
+ // Complete the CFI for the FPR saves, modelling them as taking effect
+ // after the last save.
+ for (auto CFIIndex : CFIIndexes) {
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+ // Skip the return instruction.
+ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
+
+ uint64_t StackSize = getAllocatedStackSize(MF);
+ if (ZFI->getLowSavedGPR()) {
+ --MBBI;
+ unsigned Opcode = MBBI->getOpcode();
+ if (Opcode != SystemZ::LMG)
+ llvm_unreachable("Expected to see callee-save register restore code");
+
+ unsigned AddrOpNo = 2;
+ DebugLoc DL = MBBI->getDebugLoc();
+ uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm();
+ unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+
+ // If the offset is too large, use the largest stack-aligned offset
+ // and add the rest to the base register (the stack or frame pointer).
+ if (!NewOpcode) {
+ uint64_t NumBytes = Offset - 0x7fff8;
+ emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(),
+ NumBytes, ZII);
+ Offset -= NumBytes;
+ NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+ assert(NewOpcode && "No restore instruction available");
+ }
+
+ MBBI->setDesc(ZII->get(NewOpcode));
+ MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset);
+ } else if (StackSize) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII);
+ }
+}
+
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo().hasVarSizedObjects() ||
+ MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
+}
+
+int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ const MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
+
+ // Start with the offset of FI from the top of the caller-allocated frame
+ // (i.e. the top of the 160 bytes allocated by the caller). This initial
+ // offset is therefore negative.
+ int64_t Offset = (MFFrame.getObjectOffset(FI) +
+ MFFrame.getOffsetAdjustment());
+
+ // Make the offset relative to the incoming stack pointer.
+ Offset -= getOffsetOfLocalArea();
+
+ // Make the offset relative to the bottom of the frame.
+ Offset += getAllocatedStackSize(MF);
+
+ return Offset;
+}
+
+uint64_t SystemZFrameLowering::
+getAllocatedStackSize(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFFrame = MF.getFrameInfo();
+
+ // Start with the size of the local variables and spill slots.
+ uint64_t StackSize = MFFrame.getStackSize();
+
+ // We need to allocate the ABI-defined 160-byte base area whenever
+ // we allocate stack space for our own use and whenever we call another
+ // function.
+ if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls())
+ StackSize += SystemZMC::CallFrameSize;
+
+ return StackSize;
+}
+
+bool
+SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // The ABI requires us to allocate 160 bytes of stack space for the callee,
+ // with any outgoing stack arguments being placed above that. It seems
+ // better to make that area a permanent feature of the frame even if
+ // we're using a frame pointer.
+ return true;
+}
+
+MachineBasicBlock::iterator SystemZFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ assert(hasReservedCallFrame(MF) &&
+ "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+ return MBB.erase(MI);
+ break;
+
+ default:
+ llvm_unreachable("Unexpected call frame instruction");
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 000000000000..d43a176ad874
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,64 @@
+//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class SystemZTargetMachine;
+class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+
+public:
+ SystemZFrameLowering();
+
+ // Override TargetFrameLowering.
+ bool isFPCloseToIncomingSP() const override { return false; }
+ const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const
+ override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBII,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const
+ override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ bool hasFP(const MachineFunction &MF) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
+ // Return the number of bytes in the callee-allocated part of the frame.
+ uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
+
+ // Return the byte offset from the incoming stack pointer of Reg's
+ // ABI-defined save slot. Return 0 if no slot is defined for Reg.
+ unsigned getRegSpillOffset(unsigned Reg) const {
+ return RegSpillOffsets[Reg];
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
new file mode 100644
index 000000000000..fe4b52b515e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -0,0 +1,337 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a hazard recognizer for the SystemZ scheduler.
+//
+// This class is used by the SystemZ scheduling strategy to maintain
+// the state during scheduling, and provide cost functions for
+// scheduling candidates. This includes:
+//
+// * Decoder grouping. A decoder group can maximally hold 3 uops, and
+// instructions that always begin a new group should be scheduled when
+// the current decoder group is empty.
+// * Processor resources usage. It is beneficial to balance the use of
+// resources.
+//
+// ===---------------------------------------------------------------------===//
+
+#include "SystemZHazardRecognizer.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// This is the limit of processor resource usage at which the
+// scheduler should try to look for other instructions (not using the
+// critical resource).
+static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
+ cl::desc("The OOO window for processor "
+ "resources during scheduling."),
+ cl::init(8));
+
+SystemZHazardRecognizer::
+SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
+ SchedModel(nullptr) {}
+
+unsigned SystemZHazardRecognizer::
+getNumDecoderSlots(SUnit *SU) const {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ if (!SC->isValid())
+ return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
+
+ if (SC->BeginGroup) {
+ if (!SC->EndGroup)
+ return 2; // Cracked instruction
+ else
+ return 3; // Expanded/group-alone instruction
+ }
+
+ return 1; // Normal instruction
+}
+
+unsigned SystemZHazardRecognizer::getCurrCycleIdx() {
+ unsigned Idx = CurrGroupSize;
+ if (GrpCount % 2)
+ Idx += 3;
+ return Idx;
+}
+
+ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
+getHazardType(SUnit *m, int Stalls) {
+ return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
+}
+
+void SystemZHazardRecognizer::Reset() {
+ CurrGroupSize = 0;
+ clearProcResCounters();
+ GrpCount = 0;
+ LastFPdOpCycleIdx = UINT_MAX;
+ DEBUG(CurGroupDbg = "";);
+}
+
+bool
+SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ if (!SC->isValid())
+ return true;
+
+ // A cracked instruction only fits into schedule if the current
+ // group is empty.
+ if (SC->BeginGroup)
+ return (CurrGroupSize == 0);
+
+ // Since a full group is handled immediately in EmitInstruction(),
+ // SU should fit into current group. NumSlots should be 1 or 0,
+ // since it is not a cracked or expanded instruction.
+ assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
+ "Expected normal instruction to fit in non-full group!");
+
+ return true;
+}
+
+void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
+ if (CurrGroupSize > 0) {
+ DEBUG(dumpCurrGroup("Completed decode group"));
+ DEBUG(CurGroupDbg = "";);
+
+ GrpCount++;
+
+ // Reset counter for next group.
+ CurrGroupSize = 0;
+
+ // Decrease counters for execution units by one.
+ for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+ if (ProcResourceCounters[i] > 0)
+ ProcResourceCounters[i]--;
+
+ // Clear CriticalResourceIdx if it is now below the threshold.
+ if (CriticalResourceIdx != UINT_MAX &&
+ (ProcResourceCounters[CriticalResourceIdx] <=
+ ProcResCostLim))
+ CriticalResourceIdx = UINT_MAX;
+ }
+
+ DEBUG(if (DbgOutput)
+ dumpProcResourceCounters(););
+}
+
+#ifndef NDEBUG // Debug output
+void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
+ OS << "SU(" << SU->NodeNum << "):";
+ OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ if (!SC->isValid())
+ return;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ const MCProcResourceDesc &PRD =
+ *SchedModel->getProcResource(PI->ProcResourceIdx);
+ std::string FU(PRD.Name);
+ // trim e.g. Z13_FXaUnit -> FXa
+ FU = FU.substr(FU.find("_") + 1);
+ FU.resize(FU.find("Unit"));
+ OS << "/" << FU;
+
+ if (PI->Cycles > 1)
+ OS << "(" << PI->Cycles << "cyc)";
+ }
+
+ if (SC->NumMicroOps > 1)
+ OS << "/" << SC->NumMicroOps << "uops";
+ if (SC->BeginGroup && SC->EndGroup)
+ OS << "/GroupsAlone";
+ else if (SC->BeginGroup)
+ OS << "/BeginsGroup";
+ else if (SC->EndGroup)
+ OS << "/EndsGroup";
+ if (SU->isUnbuffered)
+ OS << "/Unbuffered";
+}
+
+void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
+ dbgs() << "+++ " << Msg;
+ dbgs() << ": ";
+
+ if (CurGroupDbg.empty())
+ dbgs() << " <empty>\n";
+ else {
+ dbgs() << "{ " << CurGroupDbg << " }";
+ dbgs() << " (" << CurrGroupSize << " decoder slot"
+ << (CurrGroupSize > 1 ? "s":"")
+ << ")\n";
+ }
+}
+
+void SystemZHazardRecognizer::dumpProcResourceCounters() const {
+ bool any = false;
+
+ for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+ if (ProcResourceCounters[i] > 0) {
+ any = true;
+ break;
+ }
+
+ if (!any)
+ return;
+
+ dbgs() << "+++ Resource counters:\n";
+ for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+ if (ProcResourceCounters[i] > 0) {
+ dbgs() << "+++ Extra schedule for execution unit "
+ << SchedModel->getProcResource(i)->Name
+ << ": " << ProcResourceCounters[i] << "\n";
+ any = true;
+ }
+}
+#endif //NDEBUG
+
+void SystemZHazardRecognizer::clearProcResCounters() {
+ ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
+ CriticalResourceIdx = UINT_MAX;
+}
+
+// Update state with SU as the next scheduled unit.
+void SystemZHazardRecognizer::
+EmitInstruction(SUnit *SU) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ DEBUG( dumpCurrGroup("Decode group before emission"););
+
+ // If scheduling an SU that must begin a new decoder group, move on
+ // to next group.
+ if (!fitsIntoCurrentGroup(SU))
+ nextGroup();
+
+ DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
+ dbgs() << "\n";
+ raw_string_ostream cgd(CurGroupDbg);
+ if (CurGroupDbg.length())
+ cgd << ", ";
+ dumpSU(SU, cgd););
+
+ // After returning from a call, we don't know much about the state.
+ if (SU->getInstr()->isCall()) {
+ DEBUG (dbgs() << "+++ Clearing state after call.\n";);
+ clearProcResCounters();
+ LastFPdOpCycleIdx = UINT_MAX;
+ CurrGroupSize += getNumDecoderSlots(SU);
+ assert (CurrGroupSize <= 3);
+ nextGroup();
+ return;
+ }
+
+ // Increase counter for execution unit(s).
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ // Don't handle FPd together with the other resources.
+ if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
+ continue;
+ int &CurrCounter =
+ ProcResourceCounters[PI->ProcResourceIdx];
+ CurrCounter += PI->Cycles;
+ // Check if this is now the new critical resource.
+ if ((CurrCounter > ProcResCostLim) &&
+ (CriticalResourceIdx == UINT_MAX ||
+ (PI->ProcResourceIdx != CriticalResourceIdx &&
+ CurrCounter >
+ ProcResourceCounters[CriticalResourceIdx]))) {
+ DEBUG( dbgs() << "+++ New critical resource: "
+ << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
+ << "\n";);
+ CriticalResourceIdx = PI->ProcResourceIdx;
+ }
+ }
+
+ // Make note of an instruction that uses a blocking resource (FPd).
+ if (SU->isUnbuffered) {
+ LastFPdOpCycleIdx = getCurrCycleIdx();
+ DEBUG (dbgs() << "+++ Last FPd cycle index: "
+ << LastFPdOpCycleIdx << "\n";);
+ }
+
+ // Insert SU into current group by increasing number of slots used
+ // in current group.
+ CurrGroupSize += getNumDecoderSlots(SU);
+ assert (CurrGroupSize <= 3);
+
+ // Check if current group is now full/ended. If so, move on to next
+ // group to be ready to evaluate more candidates.
+ if (CurrGroupSize == 3 || SC->EndGroup)
+ nextGroup();
+}
+
+int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ if (!SC->isValid())
+ return 0;
+
+ // If SU begins new group, it can either break a current group early
+ // or fit naturally if current group is empty (negative cost).
+ if (SC->BeginGroup) {
+ if (CurrGroupSize)
+ return 3 - CurrGroupSize;
+ return -1;
+ }
+
+ // Similarly, a group-ending SU may either fit well (last in group), or
+ // end the group prematurely.
+ if (SC->EndGroup) {
+ unsigned resultingGroupSize =
+ (CurrGroupSize + getNumDecoderSlots(SU));
+ if (resultingGroupSize < 3)
+ return (3 - resultingGroupSize);
+ return -1;
+ }
+
+ // Most instructions can be placed in any decoder slot.
+ return 0;
+}
+
+bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
+ assert (SU->isUnbuffered);
+ // If this is the first FPd op, it should be scheduled high.
+ if (LastFPdOpCycleIdx == UINT_MAX)
+ return true;
+ // If this is not the first PFd op, it should go into the other side
+ // of the processor to use the other FPd unit there. This should
+ // generally happen if two FPd ops are placed with 2 other
+ // instructions between them (modulo 6).
+ if (LastFPdOpCycleIdx > getCurrCycleIdx())
+ return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
+ return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
+}
+
+int SystemZHazardRecognizer::
+resourcesCost(SUnit *SU) {
+ int Cost = 0;
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ if (!SC->isValid())
+ return 0;
+
+ // For a FPd op, either return min or max value as indicated by the
+ // distance to any prior FPd op.
+ if (SU->isUnbuffered)
+ Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
+ // For other instructions, give a cost to the use of the critical resource.
+ else if (CriticalResourceIdx != UINT_MAX) {
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
+ if (PI->ProcResourceIdx == CriticalResourceIdx)
+ Cost = PI->Cycles;
+ }
+
+ return Cost;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
new file mode 100644
index 000000000000..0c755c9ad1b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -0,0 +1,128 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a hazard recognizer for the SystemZ scheduler.
+//
+// This class is used by the SystemZ scheduling strategy to maintain
+// the state during scheduling, and provide cost functions for
+// scheduling candidates. This includes:
+//
+// * Decoder grouping. A decoder group can maximally hold 3 uops, and
+// instructions that always begin a new group should be scheduled when
+// the current decoder group is empty.
+// * Processor resources usage. It is beneficial to balance the use of
+// resources.
+//
+// ===---------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+
+/// SystemZHazardRecognizer maintains the state during scheduling.
+class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
+
+ ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
+
+ /// Keep track of the number of decoder slots used in the current
+ /// decoder group.
+ unsigned CurrGroupSize;
+
+ /// The tracking of resources here are quite similar to the common
+ /// code use of a critical resource. However, z13 differs in the way
+ /// that it has two processor sides which may be interesting to
+ /// model in the future (a work in progress).
+
+ /// Counters for the number of uops scheduled per processor
+ /// resource.
+ SmallVector<int, 0> ProcResourceCounters;
+
+ /// This is the resource with the greatest queue, which the
+ /// scheduler tries to avoid.
+ unsigned CriticalResourceIdx;
+
+ /// Return the number of decoder slots MI requires.
+ inline unsigned getNumDecoderSlots(SUnit *SU) const;
+
+ /// Return true if MI fits into current decoder group.
+ bool fitsIntoCurrentGroup(SUnit *SU) const;
+
+ /// Two decoder groups per cycle are formed (for z13), meaning 2x3
+ /// instructions. This function returns a number between 0 and 5,
+ /// representing the current decoder slot of the current cycle.
+ unsigned getCurrCycleIdx();
+
+ /// LastFPdOpCycleIdx stores the numbeer returned by getCurrCycleIdx()
+ /// when a stalling operation is scheduled (which uses the FPd resource).
+ unsigned LastFPdOpCycleIdx;
+
+ /// A counter of decoder groups scheduled.
+ unsigned GrpCount;
+
+ unsigned getCurrGroupSize() {return CurrGroupSize;};
+
+ /// Start next decoder group.
+ void nextGroup(bool DbgOutput = true);
+
+ /// Clear all counters for processor resources.
+ void clearProcResCounters();
+
+ /// With the goal of alternating processor sides for stalling (FPd)
+ /// ops, return true if it seems good to schedule an FPd op next.
+ bool isFPdOpPreferred_distance(const SUnit *SU);
+
+public:
+ SystemZHazardRecognizer(const MachineSchedContext *C);
+
+ void setDAG(ScheduleDAGMI *dag) {
+ DAG = dag;
+ SchedModel = dag->getSchedModel();
+ }
+
+ HazardType getHazardType(SUnit *m, int Stalls = 0) override;
+ void Reset() override;
+ void EmitInstruction(SUnit *SU) override;
+
+ // Cost functions used by SystemZPostRASchedStrategy while
+ // evaluating candidates.
+
+ /// Return the cost of decoder grouping for SU. If SU must start a
+ /// new decoder group, this is negative if this fits the schedule or
+ /// positive if it would mean ending a group prematurely. For normal
+ /// instructions this returns 0.
+ int groupingCost(SUnit *SU) const;
+
+ /// Return the cost of SU in regards to processor resources usage.
+ /// A positive value means it would be better to wait with SU, while
+ /// a negative value means it would be good to schedule SU next.
+ int resourcesCost(SUnit *SU);
+
+#ifndef NDEBUG
+ // Debug dumping.
+ std::string CurGroupDbg; // current group as text
+ void dumpSU(SUnit *SU, raw_ostream &OS) const;
+ void dumpCurrGroup(std::string Msg = "") const;
+ void dumpProcResourceCounters() const;
+#endif
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H */
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 000000000000..cd2f708458bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,1420 @@
+//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-isel"
+
+namespace {
+// Used to build addressing modes.
+struct SystemZAddressingMode {
+ // The shape of the address.
+ enum AddrForm {
+ // base+displacement
+ FormBD,
+
+ // base+displacement+index for load and store operands
+ FormBDXNormal,
+
+ // base+displacement+index for load address operands
+ FormBDXLA,
+
+ // base+displacement+index+ADJDYNALLOC
+ FormBDXDynAlloc
+ };
+ AddrForm Form;
+
+ // The type of displacement. The enum names here correspond directly
+ // to the definitions in SystemZOperand.td. We could split them into
+ // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it.
+ enum DispRange {
+ Disp12Only,
+ Disp12Pair,
+ Disp20Only,
+ Disp20Only128,
+ Disp20Pair
+ };
+ DispRange DR;
+
+ // The parts of the address. The address is equivalent to:
+ //
+ // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0)
+ SDValue Base;
+ int64_t Disp;
+ SDValue Index;
+ bool IncludesDynAlloc;
+
+ SystemZAddressingMode(AddrForm form, DispRange dr)
+ : Form(form), DR(dr), Base(), Disp(0), Index(),
+ IncludesDynAlloc(false) {}
+
+ // True if the address can have an index register.
+ bool hasIndexField() { return Form != FormBD; }
+
+ // True if the address can (and must) include ADJDYNALLOC.
+ bool isDynAlloc() { return Form == FormBDXDynAlloc; }
+
+ void dump() {
+ errs() << "SystemZAddressingMode " << this << '\n';
+
+ errs() << " Base ";
+ if (Base.getNode())
+ Base.getNode()->dump();
+ else
+ errs() << "null\n";
+
+ if (hasIndexField()) {
+ errs() << " Index ";
+ if (Index.getNode())
+ Index.getNode()->dump();
+ else
+ errs() << "null\n";
+ }
+
+ errs() << " Disp " << Disp;
+ if (IncludesDynAlloc)
+ errs() << " + ADJDYNALLOC";
+ errs() << '\n';
+ }
+};
+
+// Return a mask with Count low bits set.
+static uint64_t allOnes(unsigned int Count) {
+ assert(Count <= 64);
+ if (Count > 63)
+ return UINT64_MAX;
+ return (uint64_t(1) << Count) - 1;
+}
+
+// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation
+// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and
+// Rotate (I5). The combined operand value is effectively:
+//
+// (or (rotl Input, Rotate), ~Mask)
+//
+// for RNSBG and:
+//
+// (and (rotl Input, Rotate), Mask)
+//
+// otherwise. The output value has BitSize bits, although Input may be
+// narrower (in which case the upper bits are don't care), or wider (in which
+// case the result will be truncated as part of the operation).
+struct RxSBGOperands {
+ RxSBGOperands(unsigned Op, SDValue N)
+ : Opcode(Op), BitSize(N.getValueSizeInBits()),
+ Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63),
+ Rotate(0) {}
+
+ unsigned Opcode;
+ unsigned BitSize;
+ uint64_t Mask;
+ SDValue Input;
+ unsigned Start;
+ unsigned End;
+ unsigned Rotate;
+};
+
+class SystemZDAGToDAGISel : public SelectionDAGISel {
+ const SystemZSubtarget *Subtarget;
+
+ // Used by SystemZOperands.td to create integer constants.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {
+ return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0));
+ }
+
+ const SystemZTargetMachine &getTargetMachine() const {
+ return static_cast<const SystemZTargetMachine &>(TM);
+ }
+
+ const SystemZInstrInfo *getInstrInfo() const {
+ return Subtarget->getInstrInfo();
+ }
+
+ // Try to fold more of the base or index of AM into AM, where IsBase
+ // selects between the base and index.
+ bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const;
+
+ // Try to describe N in AM, returning true on success.
+ bool selectAddress(SDValue N, SystemZAddressingMode &AM) const;
+
+ // Extract individual target operands from matched address AM.
+ void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+ SDValue &Base, SDValue &Disp) const;
+ void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
+
+ // Try to match Addr as a FormBD address with displacement type DR.
+ // Return true on success, storing the base and displacement in
+ // Base and Disp respectively.
+ bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp) const;
+
+ // Try to match Addr as a FormBDX address with displacement type DR.
+ // Return true on success and if the result had no index. Store the
+ // base and displacement in Base and Disp respectively.
+ bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp) const;
+
+ // Try to match Addr as a FormBDX* address of form Form with
+ // displacement type DR. Return true on success, storing the base,
+ // displacement and index in Base, Disp and Index respectively.
+ bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+ SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
+
+ // PC-relative address matching routines used by SystemZOperands.td.
+ bool selectPCRelAddress(SDValue Addr, SDValue &Target) const {
+ if (SystemZISD::isPCREL(Addr.getOpcode())) {
+ Target = Addr.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+
+ // BD matching routines used by SystemZOperands.td.
+ bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
+ }
+ bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+ }
+ bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
+ }
+ bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+ }
+
+ // MVI matching routines used by SystemZOperands.td.
+ bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+ }
+ bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+ }
+
+ // BDX matching routines used by SystemZOperands.td.
+ bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp12Only,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp12Pair,
+ Addr, Base, Disp, Index);
+ }
+ bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
+ SystemZAddressingMode::Disp12Only,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp20Only,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp20Only128,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp20Pair,
+ Addr, Base, Disp, Index);
+ }
+ bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+ SystemZAddressingMode::Disp12Pair,
+ Addr, Base, Disp, Index);
+ }
+ bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) const {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+ SystemZAddressingMode::Disp20Pair,
+ Addr, Base, Disp, Index);
+ }
+
+ // Try to match Addr as an address with a base, 12-bit displacement
+ // and index, where the index is element Elem of a vector.
+ // Return true on success, storing the base, displacement and vector
+ // in Base, Disp and Index respectively.
+ bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base,
+ SDValue &Disp, SDValue &Index) const;
+
+ // Check whether (or Op (and X InsertMask)) is effectively an insertion
+ // of X into bits InsertMask of some Y != Op. Return true if so and
+ // set Op to that Y.
+ bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const;
+
+ // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
+ // Return true on success.
+ bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const;
+
+ // Try to fold some of RxSBG.Input into other fields of RxSBG.
+ // Return true on success.
+ bool expandRxSBG(RxSBGOperands &RxSBG) const;
+
+ // Return an undefined value of type VT.
+ SDValue getUNDEF(const SDLoc &DL, EVT VT) const;
+
+ // Convert N to VT, if it isn't already.
+ SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const;
+
+ // Try to implement AND or shift node N using RISBG with the zero flag set.
+ // Return the selected node on success, otherwise return null.
+ bool tryRISBGZero(SDNode *N);
+
+ // Try to use RISBG or Opcode to implement OR or XOR node N.
+ // Return the selected node on success, otherwise return null.
+ bool tryRxSBG(SDNode *N, unsigned Opcode);
+
+ // If Op0 is null, then Node is a constant that can be loaded using:
+ //
+ // (Opcode UpperVal LowerVal)
+ //
+ // If Op0 is nonnull, then Node can be implemented using:
+ //
+ // (Opcode (Opcode Op0 UpperVal) LowerVal)
+ void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
+ uint64_t UpperVal, uint64_t LowerVal);
+
+ // Try to use gather instruction Opcode to implement vector insertion N.
+ bool tryGather(SDNode *N, unsigned Opcode);
+
+ // Try to use scatter instruction Opcode to implement store Store.
+ bool tryScatter(StoreSDNode *Store, unsigned Opcode);
+
+ // Return true if Load and Store are loads and stores of the same size
+ // and are guaranteed not to overlap. Such operations can be implemented
+ // using block (SS-format) instructions.
+ //
+ // Partial overlap would lead to incorrect code, since the block operations
+ // are logically bytewise, even though they have a fast path for the
+ // non-overlapping case. We also need to avoid full overlap (i.e. two
+ // addresses that might be equal at run time) because although that case
+ // would be handled correctly, it might be implemented by millicode.
+ bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;
+
+ // N is a (store (load Y), X) pattern. Return true if it can use an MVC
+ // from Y to X.
+ bool storeLoadCanUseMVC(SDNode *N) const;
+
+ // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true
+ // if A[1 - I] == X and if N can use a block operation like NC from A[I]
+ // to X.
+ bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;
+
+public:
+ SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+ }
+
+ // Override MachineFunctionPass.
+ StringRef getPassName() const override {
+ return "SystemZ DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Override SelectionDAGISel.
+ void Select(SDNode *Node) override;
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+
+ // Include the pieces autogenerated from the target description.
+ #include "SystemZGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+// Return true if Val should be selected as a displacement for an address
+// with range DR. Here we're interested in the range of both the instruction
+// described by DR and of any pairing instruction.
+static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+ switch (DR) {
+ case SystemZAddressingMode::Disp12Only:
+ return isUInt<12>(Val);
+
+ case SystemZAddressingMode::Disp12Pair:
+ case SystemZAddressingMode::Disp20Only:
+ case SystemZAddressingMode::Disp20Pair:
+ return isInt<20>(Val);
+
+ case SystemZAddressingMode::Disp20Only128:
+ return isInt<20>(Val) && isInt<20>(Val + 8);
+ }
+ llvm_unreachable("Unhandled displacement range");
+}
+
+// Change the base or index in AM to Value, where IsBase selects
+// between the base and index.
+static void changeComponent(SystemZAddressingMode &AM, bool IsBase,
+ SDValue Value) {
+ if (IsBase)
+ AM.Base = Value;
+ else
+ AM.Index = Value;
+}
+
+// The base or index of AM is equivalent to Value + ADJDYNALLOC,
+// where IsBase selects between the base and index. Try to fold the
+// ADJDYNALLOC into AM.
+static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase,
+ SDValue Value) {
+ if (AM.isDynAlloc() && !AM.IncludesDynAlloc) {
+ changeComponent(AM, IsBase, Value);
+ AM.IncludesDynAlloc = true;
+ return true;
+ }
+ return false;
+}
+
+// The base of AM is equivalent to Base + Index. Try to use Index as
+// the index register.
+static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
+ SDValue Index) {
+ if (AM.hasIndexField() && !AM.Index.getNode()) {
+ AM.Base = Base;
+ AM.Index = Index;
+ return true;
+ }
+ return false;
+}
+
+// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
+// between the base and index. Try to fold Op1 into AM's displacement.
+static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
+ SDValue Op0, uint64_t Op1) {
+ // First try adjusting the displacement.
+ int64_t TestDisp = AM.Disp + Op1;
+ if (selectDisp(AM.DR, TestDisp)) {
+ changeComponent(AM, IsBase, Op0);
+ AM.Disp = TestDisp;
+ return true;
+ }
+
+ // We could consider forcing the displacement into a register and
+ // using it as an index, but it would need to be carefully tuned.
+ return false;
+}
+
+bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
+ bool IsBase) const {
+ SDValue N = IsBase ? AM.Base : AM.Index;
+ unsigned Opcode = N.getOpcode();
+ if (Opcode == ISD::TRUNCATE) {
+ N = N.getOperand(0);
+ Opcode = N.getOpcode();
+ }
+ if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) {
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
+
+ unsigned Op0Code = Op0->getOpcode();
+ unsigned Op1Code = Op1->getOpcode();
+
+ if (Op0Code == SystemZISD::ADJDYNALLOC)
+ return expandAdjDynAlloc(AM, IsBase, Op1);
+ if (Op1Code == SystemZISD::ADJDYNALLOC)
+ return expandAdjDynAlloc(AM, IsBase, Op0);
+
+ if (Op0Code == ISD::Constant)
+ return expandDisp(AM, IsBase, Op1,
+ cast<ConstantSDNode>(Op0)->getSExtValue());
+ if (Op1Code == ISD::Constant)
+ return expandDisp(AM, IsBase, Op0,
+ cast<ConstantSDNode>(Op1)->getSExtValue());
+
+ if (IsBase && expandIndex(AM, Op0, Op1))
+ return true;
+ }
+ if (Opcode == SystemZISD::PCREL_OFFSET) {
+ SDValue Full = N.getOperand(0);
+ SDValue Base = N.getOperand(1);
+ SDValue Anchor = Base.getOperand(0);
+ uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() -
+ cast<GlobalAddressSDNode>(Anchor)->getOffset());
+ return expandDisp(AM, IsBase, Base, Offset);
+ }
+ return false;
+}
+
+// Return true if an instruction with displacement range DR should be
+// used for displacement value Val. selectDisp(DR, Val) must already hold.
+static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+ assert(selectDisp(DR, Val) && "Invalid displacement");
+ switch (DR) {
+ case SystemZAddressingMode::Disp12Only:
+ case SystemZAddressingMode::Disp20Only:
+ case SystemZAddressingMode::Disp20Only128:
+ return true;
+
+ case SystemZAddressingMode::Disp12Pair:
+ // Use the other instruction if the displacement is too large.
+ return isUInt<12>(Val);
+
+ case SystemZAddressingMode::Disp20Pair:
+ // Use the other instruction if the displacement is small enough.
+ return !isUInt<12>(Val);
+ }
+ llvm_unreachable("Unhandled displacement range");
+}
+
+// Return true if Base + Disp + Index should be performed by LA(Y).
+static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
+ // Don't use LA(Y) for constants.
+ if (!Base)
+ return false;
+
+ // Always use LA(Y) for frame addresses, since we know that the destination
+ // register is almost always (perhaps always) going to be different from
+ // the frame register.
+ if (Base->getOpcode() == ISD::FrameIndex)
+ return true;
+
+ if (Disp) {
+ // Always use LA(Y) if there is a base, displacement and index.
+ if (Index)
+ return true;
+
+ // Always use LA if the displacement is small enough. It should always
+ // be no worse than AGHI (and better if it avoids a move).
+ if (isUInt<12>(Disp))
+ return true;
+
+ // For similar reasons, always use LAY if the constant is too big for AGHI.
+ // LAY should be no worse than AGFI.
+ if (!isInt<16>(Disp))
+ return true;
+ } else {
+ // Don't use LA for plain registers.
+ if (!Index)
+ return false;
+
+ // Don't use LA for plain addition if the index operand is only used
+ // once. It should be a natural two-operand addition in that case.
+ if (Index->hasOneUse())
+ return false;
+
+ // Prefer addition if the second operation is sign-extended, in the
+ // hope of using AGF.
+ unsigned IndexOpcode = Index->getOpcode();
+ if (IndexOpcode == ISD::SIGN_EXTEND ||
+ IndexOpcode == ISD::SIGN_EXTEND_INREG)
+ return false;
+ }
+
+ // Don't use LA for two-operand addition if either operand is only
+ // used once. The addition instructions are better in that case.
+ if (Base->hasOneUse())
+ return false;
+
+ return true;
+}
+
+// Return true if Addr is suitable for AM, updating AM if so.
+bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
+ SystemZAddressingMode &AM) const {
+ // Start out assuming that the address will need to be loaded separately,
+ // then try to extend it as much as we can.
+ AM.Base = Addr;
+
+ // First try treating the address as a constant.
+ if (Addr.getOpcode() == ISD::Constant &&
+ expandDisp(AM, true, SDValue(),
+ cast<ConstantSDNode>(Addr)->getSExtValue()))
+ ;
+ // Also see if it's a bare ADJDYNALLOC.
+ else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC &&
+ expandAdjDynAlloc(AM, true, SDValue()))
+ ;
+ else
+ // Otherwise try expanding each component.
+ while (expandAddress(AM, true) ||
+ (AM.Index.getNode() && expandAddress(AM, false)))
+ continue;
+
+ // Reject cases where it isn't profitable to use LA(Y).
+ if (AM.Form == SystemZAddressingMode::FormBDXLA &&
+ !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode()))
+ return false;
+
+ // Reject cases where the other instruction in a pair should be used.
+ if (!isValidDisp(AM.DR, AM.Disp))
+ return false;
+
+ // Make sure that ADJDYNALLOC is included where necessary.
+ if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
+ return false;
+
+ DEBUG(AM.dump());
+ return true;
+}
+
+// Insert a node into the DAG at least before Pos. This will reposition
+// the node as needed, and will assign it a node ID that is <= Pos's ID.
+// Note that this does *not* preserve the uniqueness of node IDs!
+// The selection DAG must no longer depend on their uniqueness when this
+// function is used.
+static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
+ if (N.getNode()->getNodeId() == -1 ||
+ N.getNode()->getNodeId() > Pos->getNodeId()) {
+ DAG->RepositionNode(Pos->getIterator(), N.getNode());
+ N.getNode()->setNodeId(Pos->getNodeId());
+ }
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+ EVT VT, SDValue &Base,
+ SDValue &Disp) const {
+ Base = AM.Base;
+ if (!Base.getNode())
+ // Register 0 means "no base". This is mostly useful for shifts.
+ Base = CurDAG->getRegister(0, VT);
+ else if (Base.getOpcode() == ISD::FrameIndex) {
+ // Lower a FrameIndex to a TargetFrameIndex.
+ int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FrameIndex, VT);
+ } else if (Base.getValueType() != VT) {
+ // Truncate values from i64 to i32, for shifts.
+ assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 &&
+ "Unexpected truncation");
+ SDLoc DL(Base);
+ SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base);
+ insertDAGNode(CurDAG, Base.getNode(), Trunc);
+ Base = Trunc;
+ }
+
+ // Lower the displacement to a TargetConstant.
+ Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+ EVT VT, SDValue &Base,
+ SDValue &Disp,
+ SDValue &Index) const {
+ getAddressOperands(AM, VT, Base, Disp);
+
+ Index = AM.Index;
+ if (!Index.getNode())
+ // Register 0 means "no index".
+ Index = CurDAG->getRegister(0, VT);
+}
+
+bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp) const {
+ SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
+ if (!selectAddress(Addr, AM))
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp) const {
+ SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR);
+ if (!selectAddress(Addr, AM) || AM.Index.getNode())
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+ SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp, SDValue &Index) const {
+ SystemZAddressingMode AM(Form, DR);
+ if (!selectAddress(Addr, AM))
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem,
+ SDValue &Base,
+ SDValue &Disp,
+ SDValue &Index) const {
+ SDValue Regs[2];
+ if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) &&
+ Regs[0].getNode() && Regs[1].getNode()) {
+ for (unsigned int I = 0; I < 2; ++I) {
+ Base = Regs[I];
+ Index = Regs[1 - I];
+ // We can't tell here whether the index vector has the right type
+ // for the access; the caller needs to do that instead.
+ if (Index.getOpcode() == ISD::ZERO_EXTEND)
+ Index = Index.getOperand(0);
+ if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Index.getOperand(1) == Elem) {
+ Index = Index.getOperand(0);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
+ uint64_t InsertMask) const {
+ // We're only interested in cases where the insertion is into some operand
+ // of Op, rather than into Op itself. The only useful case is an AND.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ // We need a constant mask.
+ auto *MaskNode = dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode());
+ if (!MaskNode)
+ return false;
+
+ // It's not an insertion of Op.getOperand(0) if the two masks overlap.
+ uint64_t AndMask = MaskNode->getZExtValue();
+ if (InsertMask & AndMask)
+ return false;
+
+ // It's only an insertion if all bits are covered or are known to be zero.
+ // The inner check covers all cases but is more expensive.
+ uint64_t Used = allOnes(Op.getValueSizeInBits());
+ if (Used != (AndMask | InsertMask)) {
+ KnownBits Known;
+ CurDAG->computeKnownBits(Op.getOperand(0), Known);
+ if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue()))
+ return false;
+ }
+
+ Op = Op.getOperand(0);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG,
+ uint64_t Mask) const {
+ const SystemZInstrInfo *TII = getInstrInfo();
+ if (RxSBG.Rotate != 0)
+ Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate));
+ Mask &= RxSBG.Mask;
+ if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) {
+ RxSBG.Mask = Mask;
+ return true;
+ }
+ return false;
+}
+
+// Return true if any bits of (RxSBG.Input & Mask) are significant.
+static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) {
+ // Rotate the mask in the same way as RxSBG.Input is rotated.
+ if (RxSBG.Rotate != 0)
+ Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)));
+ return (Mask & RxSBG.Mask) != 0;
+}
+
+bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
+ SDValue N = RxSBG.Input;
+ unsigned Opcode = N.getOpcode();
+ switch (Opcode) {
+ case ISD::TRUNCATE: {
+ if (RxSBG.Opcode == SystemZ::RNSBG)
+ return false;
+ uint64_t BitSize = N.getValueSizeInBits();
+ uint64_t Mask = allOnes(BitSize);
+ if (!refineRxSBGMask(RxSBG, Mask))
+ return false;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+ case ISD::AND: {
+ if (RxSBG.Opcode == SystemZ::RNSBG)
+ return false;
+
+ auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!MaskNode)
+ return false;
+
+ SDValue Input = N.getOperand(0);
+ uint64_t Mask = MaskNode->getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask)) {
+ // If some bits of Input are already known zeros, those bits will have
+ // been removed from the mask. See if adding them back in makes the
+ // mask suitable.
+ KnownBits Known;
+ CurDAG->computeKnownBits(Input, Known);
+ Mask |= Known.Zero.getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask))
+ return false;
+ }
+ RxSBG.Input = Input;
+ return true;
+ }
+
+ case ISD::OR: {
+ if (RxSBG.Opcode != SystemZ::RNSBG)
+ return false;
+
+ auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!MaskNode)
+ return false;
+
+ SDValue Input = N.getOperand(0);
+ uint64_t Mask = ~MaskNode->getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask)) {
+ // If some bits of Input are already known ones, those bits will have
+ // been removed from the mask. See if adding them back in makes the
+ // mask suitable.
+ KnownBits Known;
+ CurDAG->computeKnownBits(Input, Known);
+ Mask &= ~Known.One.getZExtValue();
+ if (!refineRxSBGMask(RxSBG, Mask))
+ return false;
+ }
+ RxSBG.Input = Input;
+ return true;
+ }
+
+ case ISD::ROTL: {
+ // Any 64-bit rotate left can be merged into the RxSBG.
+ if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64)
+ return false;
+ auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!CountNode)
+ return false;
+
+ RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
+ case ISD::ANY_EXTEND:
+ // Bits above the extended operand are don't-care.
+ RxSBG.Input = N.getOperand(0);
+ return true;
+
+ case ISD::ZERO_EXTEND:
+ if (RxSBG.Opcode != SystemZ::RNSBG) {
+ // Restrict the mask to the extended operand.
+ unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();
+ if (!refineRxSBGMask(RxSBG, allOnes(InnerBitSize)))
+ return false;
+
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+
+ case ISD::SIGN_EXTEND: {
+ // Check that the extension bits are don't-care (i.e. are masked out
+ // by the final mask).
+ unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();
+ if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize)))
+ return false;
+
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
+ case ISD::SHL: {
+ auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!CountNode)
+ return false;
+
+ uint64_t Count = CountNode->getZExtValue();
+ unsigned BitSize = N.getValueSizeInBits();
+ if (Count < 1 || Count >= BitSize)
+ return false;
+
+ if (RxSBG.Opcode == SystemZ::RNSBG) {
+ // Treat (shl X, count) as (rotl X, size-count) as long as the bottom
+ // count bits from RxSBG.Input are ignored.
+ if (maskMatters(RxSBG, allOnes(Count)))
+ return false;
+ } else {
+ // Treat (shl X, count) as (and (rotl X, count), ~0<<count).
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count))
+ return false;
+ }
+
+ RxSBG.Rotate = (RxSBG.Rotate + Count) & 63;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
+ case ISD::SRL:
+ case ISD::SRA: {
+ auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+ if (!CountNode)
+ return false;
+
+ uint64_t Count = CountNode->getZExtValue();
+ unsigned BitSize = N.getValueSizeInBits();
+ if (Count < 1 || Count >= BitSize)
+ return false;
+
+ if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) {
+ // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top
+ // count bits from RxSBG.Input are ignored.
+ if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count)))
+ return false;
+ } else {
+ // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count),
+ // which is similar to SLL above.
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count)))
+ return false;
+ }
+
+ RxSBG.Rotate = (RxSBG.Rotate - Count) & 63;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const {
+ SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return SDValue(N, 0);
+}
+
+SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT,
+ SDValue N) const {
+ if (N.getValueType() == MVT::i32 && VT == MVT::i64)
+ return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,
+ DL, VT, getUNDEF(DL, MVT::i64), N);
+ if (N.getValueType() == MVT::i64 && VT == MVT::i32)
+ return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N);
+ assert(N.getValueType() == VT && "Unexpected value types");
+ return N;
+}
+
+bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getSizeInBits() > 64)
+ return false;
+ RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
+ unsigned Count = 0;
+ while (expandRxSBG(RISBG))
+ // The widening or narrowing is expected to be free.
+ // Counting widening or narrowing as a saved operation will result in
+ // preferring an R*SBG over a simple shift/logical instruction.
+ if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND &&
+ RISBG.Input.getOpcode() != ISD::TRUNCATE)
+ Count += 1;
+ if (Count == 0)
+ return false;
+
+ // Prefer to use normal shift instructions over RISBG, since they can handle
+ // all cases and are sometimes shorter.
+ if (Count == 1 && N->getOpcode() != ISD::AND)
+ return false;
+
+ // Prefer register extensions like LLC over RISBG. Also prefer to start
+ // out with normal ANDs if one instruction would be enough. We can convert
+ // these ANDs into an RISBG later if a three-address instruction is useful.
+ if (RISBG.Rotate == 0) {
+ bool PreferAnd = false;
+ // Prefer AND for any 32-bit and-immediate operation.
+ if (VT == MVT::i32)
+ PreferAnd = true;
+ // As well as for any 64-bit operation that can be implemented via LLC(R),
+ // LLH(R), LLGT(R), or one of the and-immediate instructions.
+ else if (RISBG.Mask == 0xff ||
+ RISBG.Mask == 0xffff ||
+ RISBG.Mask == 0x7fffffff ||
+ SystemZ::isImmLF(~RISBG.Mask) ||
+ SystemZ::isImmHF(~RISBG.Mask))
+ PreferAnd = true;
+ // And likewise for the LLZRGF instruction, which doesn't have a register
+ // to register version.
+ else if (auto *Load = dyn_cast<LoadSDNode>(RISBG.Input)) {
+ if (Load->getMemoryVT() == MVT::i32 &&
+ (Load->getExtensionType() == ISD::EXTLOAD ||
+ Load->getExtensionType() == ISD::ZEXTLOAD) &&
+ RISBG.Mask == 0xffffff00 &&
+ Subtarget->hasLoadAndZeroRightmostByte())
+ PreferAnd = true;
+ }
+ if (PreferAnd) {
+ // Replace the current node with an AND. Note that the current node
+ // might already be that same AND, in which case it is already CSE'd
+ // with it, and we must not call ReplaceNode.
+ SDValue In = convertTo(DL, VT, RISBG.Input);
+ SDValue Mask = CurDAG->getConstant(RISBG.Mask, DL, VT);
+ SDValue New = CurDAG->getNode(ISD::AND, DL, VT, In, Mask);
+ if (N != New.getNode()) {
+ insertDAGNode(CurDAG, N, Mask);
+ insertDAGNode(CurDAG, N, New);
+ ReplaceNode(N, New.getNode());
+ N = New.getNode();
+ }
+ // Now, select the machine opcode to implement this operation.
+ SelectCode(N);
+ return true;
+ }
+ }
+
+ unsigned Opcode = SystemZ::RISBG;
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (Subtarget->hasMiscellaneousExtensions())
+ Opcode = SystemZ::RISBGN;
+ EVT OpcodeVT = MVT::i64;
+ if (VT == MVT::i32 && Subtarget->hasHighWord()) {
+ Opcode = SystemZ::RISBMux;
+ OpcodeVT = MVT::i32;
+ RISBG.Start &= 31;
+ RISBG.End &= 31;
+ }
+ SDValue Ops[5] = {
+ getUNDEF(DL, OpcodeVT),
+ convertTo(DL, OpcodeVT, RISBG.Input),
+ CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32),
+ CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32),
+ CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32)
+ };
+ SDValue New = convertTo(
+ DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0));
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getSizeInBits() > 64)
+ return false;
+ // Try treating each operand of N as the second operand of the RxSBG
+ // and see which goes deepest.
+ RxSBGOperands RxSBG[] = {
+ RxSBGOperands(Opcode, N->getOperand(0)),
+ RxSBGOperands(Opcode, N->getOperand(1))
+ };
+ unsigned Count[] = { 0, 0 };
+ for (unsigned I = 0; I < 2; ++I)
+ while (expandRxSBG(RxSBG[I]))
+ // The widening or narrowing is expected to be free.
+ // Counting widening or narrowing as a saved operation will result in
+ // preferring an R*SBG over a simple shift/logical instruction.
+ if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND &&
+ RxSBG[I].Input.getOpcode() != ISD::TRUNCATE)
+ Count[I] += 1;
+
+ // Do nothing if neither operand is suitable.
+ if (Count[0] == 0 && Count[1] == 0)
+ return false;
+
+ // Pick the deepest second operand.
+ unsigned I = Count[0] > Count[1] ? 0 : 1;
+ SDValue Op0 = N->getOperand(I ^ 1);
+
+ // Prefer IC for character insertions from memory.
+ if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0)
+ if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode()))
+ if (Load->getMemoryVT() == MVT::i8)
+ return false;
+
+ // See whether we can avoid an AND in the first operand by converting
+ // ROSBG to RISBG.
+ if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
+ Opcode = SystemZ::RISBG;
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (Subtarget->hasMiscellaneousExtensions())
+ Opcode = SystemZ::RISBGN;
+ }
+
+ SDValue Ops[5] = {
+ convertTo(DL, MVT::i64, Op0),
+ convertTo(DL, MVT::i64, RxSBG[I].Input),
+ CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32),
+ CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32),
+ CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32)
+ };
+ SDValue New = convertTo(
+ DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0));
+ ReplaceNode(N, New.getNode());
+ return true;
+}
+
+void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
+ SDValue Op0, uint64_t UpperVal,
+ uint64_t LowerVal) {
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
+ SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT);
+ if (Op0.getNode())
+ Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
+
+ {
+ // When we haven't passed in Op0, Upper will be a constant. In order to
+ // prevent folding back to the large immediate in `Or = getNode(...)` we run
+ // SelectCode first and end up with an opaque machine node. This means that
+ // we need to use a handle to keep track of Upper in case it gets CSE'd by
+ // SelectCode.
+ //
+ // Note that in the case where Op0 is passed in we could just call
+ // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing
+ // the handle at all, but it's fine to do it here.
+ //
+ // TODO: This is a pretty hacky way to do this. Can we do something that
+ // doesn't require a two paragraph explanation?
+ HandleSDNode Handle(Upper);
+ SelectCode(Upper.getNode());
+ Upper = Handle.getValue();
+ }
+
+ SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);
+ SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
+
+ ReplaceUses(Node, Or.getNode());
+ CurDAG->RemoveDeadNode(Node);
+
+ SelectCode(Or.getNode());
+}
+
+bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
+ SDValue ElemV = N->getOperand(2);
+ auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
+ if (!ElemN)
+ return false;
+
+ unsigned Elem = ElemN->getZExtValue();
+ EVT VT = N->getValueType(0);
+ if (Elem >= VT.getVectorNumElements())
+ return false;
+
+ auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
+ if (!Load || !Load->hasOneUse())
+ return false;
+ if (Load->getMemoryVT().getSizeInBits() !=
+ Load->getValueType(0).getSizeInBits())
+ return false;
+
+ SDValue Base, Disp, Index;
+ if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
+ Index.getValueType() != VT.changeVectorElementTypeToInteger())
+ return false;
+
+ SDLoc DL(Load);
+ SDValue Ops[] = {
+ N->getOperand(0), Base, Disp, Index,
+ CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain()
+ };
+ SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
+ ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
+ ReplaceNode(N, Res);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
+ SDValue Value = Store->getValue();
+ if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+ if (Store->getMemoryVT().getSizeInBits() != Value.getValueSizeInBits())
+ return false;
+
+ SDValue ElemV = Value.getOperand(1);
+ auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
+ if (!ElemN)
+ return false;
+
+ SDValue Vec = Value.getOperand(0);
+ EVT VT = Vec.getValueType();
+ unsigned Elem = ElemN->getZExtValue();
+ if (Elem >= VT.getVectorNumElements())
+ return false;
+
+ SDValue Base, Disp, Index;
+ if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
+ Index.getValueType() != VT.changeVectorElementTypeToInteger())
+ return false;
+
+ SDLoc DL(Store);
+ SDValue Ops[] = {
+ Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),
+ Store->getChain()
+ };
+ ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
+ return true;
+}
+
+bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
+ LoadSDNode *Load) const {
+ // Check that the two memory operands have the same size.
+ if (Load->getMemoryVT() != Store->getMemoryVT())
+ return false;
+
+ // Volatility stops an access from being decomposed.
+ if (Load->isVolatile() || Store->isVolatile())
+ return false;
+
+ // There's no chance of overlap if the load is invariant.
+ if (Load->isInvariant() && Load->isDereferenceable())
+ return true;
+
+ // Otherwise we need to check whether there's an alias.
+ const Value *V1 = Load->getMemOperand()->getValue();
+ const Value *V2 = Store->getMemOperand()->getValue();
+ if (!V1 || !V2)
+ return false;
+
+ // Reject equality.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
+ int64_t End1 = Load->getSrcValueOffset() + Size;
+ int64_t End2 = Store->getSrcValueOffset() + Size;
+ if (V1 == V2 && End1 == End2)
+ return false;
+
+ return !AA->alias(MemoryLocation(V1, End1, Load->getAAInfo()),
+ MemoryLocation(V2, End2, Store->getAAInfo()));
+}
+
+bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
+ auto *Store = cast<StoreSDNode>(N);
+ auto *Load = cast<LoadSDNode>(Store->getValue());
+
+ // Prefer not to use MVC if either address can use ... RELATIVE LONG
+ // instructions.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
+ if (Size > 1 && Size <= 8) {
+ // Prefer LHRL, LRL and LGRL.
+ if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode()))
+ return false;
+ // Prefer STHRL, STRL and STGRL.
+ if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode()))
+ return false;
+ }
+
+ return canUseBlockOperation(Store, Load);
+}
+
+bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
+ unsigned I) const {
+ auto *StoreA = cast<StoreSDNode>(N);
+ auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
+ auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
+ return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
+}
+
+void SystemZDAGToDAGISel::Select(SDNode *Node) {
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
+ return;
+ }
+
+ unsigned Opcode = Node->getOpcode();
+ switch (Opcode) {
+ case ISD::OR:
+ if (Node->getOperand(1).getOpcode() != ISD::Constant)
+ if (tryRxSBG(Node, SystemZ::ROSBG))
+ return;
+ goto or_xor;
+
+ case ISD::XOR:
+ if (Node->getOperand(1).getOpcode() != ISD::Constant)
+ if (tryRxSBG(Node, SystemZ::RXSBG))
+ return;
+ // Fall through.
+ or_xor:
+ // If this is a 64-bit operation in which both 32-bit halves are nonzero,
+ // split the operation into two.
+ if (Node->getValueType(0) == MVT::i64)
+ if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+ uint64_t Val = Op1->getZExtValue();
+ if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
+ splitLargeImmediate(Opcode, Node, Node->getOperand(0),
+ Val - uint32_t(Val), uint32_t(Val));
+ return;
+ }
+ }
+ break;
+
+ case ISD::AND:
+ if (Node->getOperand(1).getOpcode() != ISD::Constant)
+ if (tryRxSBG(Node, SystemZ::RNSBG))
+ return;
+ LLVM_FALLTHROUGH;
+ case ISD::ROTL:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::ZERO_EXTEND:
+ if (tryRISBGZero(Node))
+ return;
+ break;
+
+ case ISD::Constant:
+ // If this is a 64-bit constant that is out of the range of LLILF,
+ // LLIHF and LGFI, split it into two 32-bit pieces.
+ if (Node->getValueType(0) == MVT::i64) {
+ uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue();
+ if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) {
+ splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val),
+ uint32_t(Val));
+ return;
+ }
+ }
+ break;
+
+ case SystemZISD::SELECT_CCMASK: {
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ // Prefer to put any load first, so that it can be matched as a
+ // conditional load. Likewise for constants in range for LOCHI.
+ if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) ||
+ (Subtarget->hasLoadStoreOnCond2() &&
+ Node->getValueType(0).isInteger() &&
+ Op1.getOpcode() == ISD::Constant &&
+ isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) &&
+ !(Op0.getOpcode() == ISD::Constant &&
+ isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) {
+ SDValue CCValid = Node->getOperand(2);
+ SDValue CCMask = Node->getOperand(3);
+ uint64_t ConstCCValid =
+ cast<ConstantSDNode>(CCValid.getNode())->getZExtValue();
+ uint64_t ConstCCMask =
+ cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
+ // Invert the condition.
+ CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
+ CCMask.getValueType());
+ SDValue Op4 = Node->getOperand(4);
+ Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);
+ }
+ break;
+ }
+
+ case ISD::INSERT_VECTOR_ELT: {
+ EVT VT = Node->getValueType(0);
+ unsigned ElemBitSize = VT.getScalarSizeInBits();
+ if (ElemBitSize == 32) {
+ if (tryGather(Node, SystemZ::VGEF))
+ return;
+ } else if (ElemBitSize == 64) {
+ if (tryGather(Node, SystemZ::VGEG))
+ return;
+ }
+ break;
+ }
+
+ case ISD::STORE: {
+ auto *Store = cast<StoreSDNode>(Node);
+ unsigned ElemBitSize = Store->getValue().getValueSizeInBits();
+ if (ElemBitSize == 32) {
+ if (tryScatter(Store, SystemZ::VSCEF))
+ return;
+ } else if (ElemBitSize == 64) {
+ if (tryScatter(Store, SystemZ::VSCEG))
+ return;
+ }
+ break;
+ }
+ }
+
+ SelectCode(Node);
+}
+
+bool SystemZDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op,
+ unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) {
+ SystemZAddressingMode::AddrForm Form;
+ SystemZAddressingMode::DispRange DispRange;
+ SDValue Base, Disp, Index;
+
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_Q:
+ // Accept an address with a short displacement, but no index.
+ Form = SystemZAddressingMode::FormBD;
+ DispRange = SystemZAddressingMode::Disp12Only;
+ break;
+ case InlineAsm::Constraint_R:
+ // Accept an address with a short displacement and an index.
+ Form = SystemZAddressingMode::FormBDXNormal;
+ DispRange = SystemZAddressingMode::Disp12Only;
+ break;
+ case InlineAsm::Constraint_S:
+ // Accept an address with a long displacement, but no index.
+ Form = SystemZAddressingMode::FormBD;
+ DispRange = SystemZAddressingMode::Disp20Only;
+ break;
+ case InlineAsm::Constraint_T:
+ case InlineAsm::Constraint_m:
+ // Accept an address with a long displacement and an index.
+ // m works the same as T, as this is the most general case.
+ Form = SystemZAddressingMode::FormBDXNormal;
+ DispRange = SystemZAddressingMode::Disp20Only;
+ break;
+ }
+
+ if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) {
+ const TargetRegisterClass *TRC =
+ Subtarget->getRegisterInfo()->getPointerRegClass(*MF);
+ SDLoc DL(Base);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32);
+
+ // Make sure that the base address doesn't go into %r0.
+ // If it's a TargetFrameIndex or a fixed register, we shouldn't do anything.
+ if (Base.getOpcode() != ISD::TargetFrameIndex &&
+ Base.getOpcode() != ISD::Register) {
+ Base =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ DL, Base.getValueType(),
+ Base, RC), 0);
+ }
+
+ // Make sure that the index register isn't assigned to %r0 either.
+ if (Index.getOpcode() != ISD::Register) {
+ Index =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ DL, Index.getValueType(),
+ Index, RC), 0);
+ }
+
+ OutOps.push_back(Base);
+ OutOps.push_back(Disp);
+ OutOps.push_back(Index);
+ return false;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 000000000000..fef4a8c92a36
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,6397 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZISelLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
+#include <cctype>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-lower"
+
+namespace {
+// Represents a sequence for extracting a 0/1 value from an IPM result:
+// (((X ^ XORValue) + AddValue) >> Bit)
+struct IPMConversion {
+ IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
+ : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
+
+ int64_t XORValue;
+ int64_t AddValue;
+ unsigned Bit;
+};
+
+// Represents information about a comparison.
+struct Comparison {
+ Comparison(SDValue Op0In, SDValue Op1In)
+ : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
+
+ // The operands to the comparison.
+ SDValue Op0, Op1;
+
+ // The opcode that should be used to compare Op0 and Op1.
+ unsigned Opcode;
+
+ // A SystemZICMP value. Only used for integer comparisons.
+ unsigned ICmpType;
+
+ // The mask of CC values that Opcode can produce.
+ unsigned CCValid;
+
+ // The mask of CC values for which the original condition is true.
+ unsigned CCMask;
+};
+} // end anonymous namespace
+
+// Classify VT as either 32 or 64 bit.
+static bool is32Bit(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ return true;
+ case MVT::i64:
+ return false;
+ default:
+ llvm_unreachable("Unsupported type");
+ }
+}
+
+// Return a version of MachineOperand that can be safely used before the
+// final use.
+static MachineOperand earlyUseOperand(MachineOperand Op) {
+ if (Op.isReg())
+ Op.setIsKill(false);
+ return Op;
+}
+
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
+ const SystemZSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
+
+ // Set up the register classes.
+ if (Subtarget.hasHighWord())
+ addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
+ else
+ addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
+ addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ }
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ }
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties(Subtarget.getRegisterInfo());
+
+ // Set up special registers.
+ setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+ // TODO: It may be better to default to latency-oriented scheduling, however
+ // LLVM's current latency-oriented scheduler can't handle physreg definitions
+ // such as SystemZ has with CC, so set this to the register-pressure
+ // scheduler, because it can.
+ setSchedulingPreference(Sched::RegPressure);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // Instructions are strings of 2-byte aligned 2-byte values.
+ setMinFunctionAlignment(2);
+
+ // Handle operations that are handled in a similar way for all types.
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_FP_VALUETYPE;
+ ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ // Lower SET_CC into an IPM-based sequence.
+ setOperationAction(ISD::SETCC, VT, Custom);
+
+ // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ // Lower SELECT_CC and BR_CC into separate comparisons and branches.
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ }
+ }
+
+ // Expand jump table branches as address arithmetic followed by an
+ // indirect jump.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // Expand BRCOND into a BR_CC (see above).
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ // Handle integer types.
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_INTEGER_VALUETYPE;
+ ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ // Expand individual DIV and REMs into DIVREMs.
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
+
+ // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
+ // stores, putting a serialization instruction after the stores.
+ setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+
+ // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
+ // available, or if the operand is constant.
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+
+ // Use POPCNT on z196 and above.
+ if (Subtarget.hasPopulationCount())
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ else
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
+ // No special instructions for these.
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+
+ // Use *MUL_LOHI where possible instead of MULH*.
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Custom);
+ setOperationAction(ISD::UMUL_LOHI, VT, Custom);
+
+ // Only z196 and above have native support for conversions to unsigned.
+ // On z10, promoting to i64 doesn't generate an inexact condition for
+ // values that are outside the i32 range but in the i64 range, so use
+ // the default expansion.
+ if (!Subtarget.hasFPExtension())
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ }
+
+ // Type legalization will convert 8- and 16-bit atomic operations into
+ // forms that operate on i32s (but still keeping the original memory VT).
+ // Lower them into full i32 operations.
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // Traps are legal, as we will convert them to "j .+2".
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // z10 has instructions for signed but not unsigned FP conversion.
+ // Handle unsigned 32-bit types as signed 64-bit types.
+ if (!Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ }
+
+ // We have native support for a 64-bit CTLZ, via FLOGR.
+ setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+ setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+
+ // Give LowerOperation the chance to replace 64-bit ORs with subregs.
+ setOperationAction(ISD::OR, MVT::i64, Custom);
+
+ // FIXME: Can we support these natively?
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+ // We have native instructions for i8, i16 and i32 extensions, but not i1.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ }
+
+ // Handle the various types of symbolic address.
+ setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+ setOperationAction(ISD::JumpTable, PtrVT, Custom);
+
+ // We need to handle dynamic allocations specially because of the
+ // 160-byte area at the bottom of the stack.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
+
+ // Use custom expanders so that we can force the function to use
+ // a frame pointer.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+
+ // Handle prefetches with PFD or PFDRL.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ for (MVT VT : MVT::vector_valuetypes()) {
+ // Assume by default that all vector operations need to be expanded.
+ for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
+ if (getOperationAction(Opcode, VT) == Legal)
+ setOperationAction(Opcode, VT, Expand);
+
+ // Likewise all truncating stores and extending loads.
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
+
+ if (isTypeLegal(VT)) {
+ // These operations are legal for anything that can be stored in a
+ // vector register, even if there is no native support for the format
+ // as such. In particular, we can do these for v4f32 even though there
+ // are no specific instructions for that format.
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::BITCAST, VT, Legal);
+ setOperationAction(ISD::UNDEF, VT, Legal);
+
+ // Likewise, except that we need to replace the nodes with something
+ // more specific.
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+ }
+
+ // Handle integer vector types.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ if (isTypeLegal(VT)) {
+ // These operations have direct equivalents.
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ if (VT != MVT::v2i64)
+ setOperationAction(ISD::MUL, VT, Legal);
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Legal);
+ setOperationAction(ISD::CTLZ, VT, Legal);
+
+ // Convert a GPR scalar to a vector by inserting it into element 0.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+
+ // Use a series of unpacks for extensions.
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
+ // Detect shifts by a scalar amount and convert them into
+ // V*_BY_SCALAR.
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+
+ // At present ROTL isn't matched by DAGCombiner. ROTR should be
+ // converted into ROTL.
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+
+ // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
+ // and inverting the result as necessary.
+ setOperationAction(ISD::SETCC, VT, Custom);
+ }
+ }
+
+ if (Subtarget.hasVector()) {
+ // There should be no need to check for float types other than v2f64
+ // since <2 x f32> isn't a legal type.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
+ }
+
+ // Handle floating-point types.
+ for (unsigned I = MVT::FIRST_FP_VALUETYPE;
+ I <= MVT::LAST_FP_VALUETYPE;
+ ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ // We can use FI for FRINT.
+ setOperationAction(ISD::FRINT, VT, Legal);
+
+ // We can use the extended form of FI for other rounding operations.
+ if (Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUND, VT, Legal);
+ }
+
+ // No special instructions for these.
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ }
+ }
+
+ // Handle floating-point vector types.
+ if (Subtarget.hasVector()) {
+ // Scalar-to-vector conversion is just a subreg.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
+ // Some insertions and extractions can be done directly but others
+ // need to go via integers.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ // These operations have direct equivalents.
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+ }
+
+ // We have fused multiply-addition for f32 and f64 but not f128.
+ setOperationAction(ISD::FMA, MVT::f32, Legal);
+ setOperationAction(ISD::FMA, MVT::f64, Legal);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ // Needed so that we don't try to implement f128 constant loads using
+ // a load-and-extend of a f80 constant (in cases where the constant
+ // would fit in an f80).
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+
+ // Floating-point truncation and stores need to be done separately.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+
+ // We have 64-bit FPR<->GPR moves, but need special handling for
+ // 32-bit forms.
+ if (!Subtarget.hasVector()) {
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+ }
+
+ // VASTART and VACOPY need to deal with the SystemZ-specific varargs
+ // structure, but VAEND is a no-op.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ // Codes for which we want to perform some z-specific combinations.
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::ROTL);
+
+ // Handle intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // We want to use MVC in preference to even a single load/store pair.
+ MaxStoresPerMemcpy = 0;
+ MaxStoresPerMemcpyOptSize = 0;
+
+ // The main memset sequence is a byte store followed by an MVC.
+ // Two STC or MV..I stores win over that, but the kind of fused stores
+ // generated by target-independent code don't when the byte value is
+ // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
+ // than "STC;MVC". Handle the choice in target-specific code instead.
+ MaxStoresPerMemset = 0;
+ MaxStoresPerMemsetOptSize = 0;
+}
+
+EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
+ LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ case MVT::f128:
+ return false;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
+ return Imm.isZero() || Imm.isNegZero();
+}
+
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // We can use CGFI or CLGFI.
+ return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // We can use ALGFI or SLGFI.
+ return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
+bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
+ // Unaligned accesses should never be slower than the expanded version.
+ // We check specifically for aligned accesses in the few cases where
+ // they are required.
+ if (Fast)
+ *Fast = true;
+ return true;
+}
+
+bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
+ // Punt on globals for now, although they can be used in limited
+ // RELATIVE LONG cases.
+ if (AM.BaseGV)
+ return false;
+
+ // Require a 20-bit signed offset.
+ if (!isInt<20>(AM.BaseOffs))
+ return false;
+
+ // Indexing is OK but no scale factor can be applied.
+ return AM.Scale == 0 || AM.Scale == 1;
+}
+
+bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I,
+ int64_t Offset) const {
+ // This only applies to z13.
+ if (!Subtarget.hasVector())
+ return true;
+
+ // * Use LDE instead of LE/LEY to avoid partial register
+ // dependencies (LDE only supports small offsets).
+ // * Utilize the vector registers to hold floating point
+ // values (vector load / store instructions only support small
+ // offsets).
+
+ assert (isa<LoadInst>(I) || isa<StoreInst>(I));
+ Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
+ I->getOperand(0)->getType());
+ bool IsFPAccess = MemAccessTy->isFloatingPointTy();
+ bool IsVectorAccess = MemAccessTy->isVectorTy();
+
+ // A store of an extracted vector element will be combined into a VSTE type
+ // instruction.
+ if (!IsVectorAccess && isa<StoreInst>(I)) {
+ Value *DataOp = I->getOperand(0);
+ if (isa<ExtractElementInst>(DataOp))
+ IsVectorAccess = true;
+ }
+
+ // A load which gets inserted into a vector element will be combined into a
+ // VLE type instruction.
+ if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
+ User *LoadUser = *I->user_begin();
+ if (isa<InsertElementInst>(LoadUser))
+ IsVectorAccess = true;
+ }
+
+ if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess))
+ return false;
+
+ return true;
+}
+
+bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
+ if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
+ return false;
+ unsigned FromBits = FromType->getPrimitiveSizeInBits();
+ unsigned ToBits = ToType->getPrimitiveSizeInBits();
+ return FromBits > ToBits;
+}
+
+bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
+ if (!FromVT.isInteger() || !ToVT.isInteger())
+ return false;
+ unsigned FromBits = FromVT.getSizeInBits();
+ unsigned ToBits = ToVT.getSizeInBits();
+ return FromBits > ToBits;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline asm support
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'a': // Address register
+ case 'd': // Data register (equivalent to 'r')
+ case 'f': // Floating-point register
+ case 'h': // High-part register
+ case 'r': // General-purpose register
+ return C_RegisterClass;
+
+ case 'Q': // Memory with base and unsigned 12-bit displacement
+ case 'R': // Likewise, plus an index
+ case 'S': // Memory with base and signed 20-bit displacement
+ case 'T': // Likewise, plus an index
+ case 'm': // Equivalent to 'T'.
+ return C_Memory;
+
+ case 'I': // Unsigned 8-bit constant
+ case 'J': // Unsigned 12-bit constant
+ case 'K': // Signed 16-bit constant
+ case 'L': // Signed 20-bit displacement (on all targets we support)
+ case 'M': // 0x7fffffff
+ return C_Other;
+
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight SystemZTargetLowering::
+getSingleConstraintMatchWeight(AsmOperandInfo &info,
+ const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+
+ case 'a': // Address register
+ case 'd': // Data register (equivalent to 'r')
+ case 'h': // High-part register
+ case 'r': // General-purpose register
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+
+ case 'f': // Floating-point register
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+
+ case 'I': // Unsigned 8-bit constant
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isUInt<8>(C->getZExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'J': // Unsigned 12-bit constant
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isUInt<12>(C->getZExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'K': // Signed 16-bit constant
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isInt<16>(C->getSExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'L': // Signed 20-bit displacement (on all targets we support)
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isInt<20>(C->getSExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'M': // 0x7fffffff
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (C->getZExtValue() == 0x7fffffff)
+ weight = CW_Constant;
+ break;
+ }
+ return weight;
+}
+
+// Parse a "{tNNN}" register constraint for which the register type "t"
+// has already been verified. MC is the class associated with "t" and
+// Map maps 0-based register numbers to LLVM register numbers.
+static std::pair<unsigned, const TargetRegisterClass *>
+parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
+ const unsigned *Map) {
+ assert(*(Constraint.end()-1) == '}' && "Missing '}'");
+ if (isdigit(Constraint[2])) {
+ unsigned Index;
+ bool Failed =
+ Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
+ if (!Failed && Index < 16 && Map[Index])
+ return std::make_pair(Map[Index], RC);
+ }
+ return std::make_pair(0U, nullptr);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+SystemZTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'd': // Data register (equivalent to 'r')
+ case 'r': // General-purpose register
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &SystemZ::GR64BitRegClass);
+ else if (VT == MVT::i128)
+ return std::make_pair(0U, &SystemZ::GR128BitRegClass);
+ return std::make_pair(0U, &SystemZ::GR32BitRegClass);
+
+ case 'a': // Address register
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
+ else if (VT == MVT::i128)
+ return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
+ return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+
+ case 'h': // High-part register (an LLVM extension)
+ return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
+
+ case 'f': // Floating-point register
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+ return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+ }
+ }
+ if (Constraint.size() > 0 && Constraint[0] == '{') {
+ // We need to override the default register parsing for GPRs and FPRs
+ // because the interpretation depends on VT. The internal names of
+ // the registers are also different from the external names
+ // (F0D and F0S instead of F0, etc.).
+ if (Constraint[1] == 'r') {
+ if (VT == MVT::i32)
+ return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
+ SystemZMC::GR32Regs);
+ if (VT == MVT::i128)
+ return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
+ SystemZMC::GR128Regs);
+ return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
+ SystemZMC::GR64Regs);
+ }
+ if (Constraint[1] == 'f') {
+ if (VT == MVT::f32)
+ return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
+ SystemZMC::FP32Regs);
+ if (VT == MVT::f128)
+ return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
+ SystemZMC::FP128Regs);
+ return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
+ SystemZMC::FP64Regs);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+void SystemZTargetLowering::
+LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Only support length 1 constraints for now.
+ if (Constraint.length() == 1) {
+ switch (Constraint[0]) {
+ case 'I': // Unsigned 8-bit constant
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
+ if (isUInt<8>(C->getZExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+ Op.getValueType()));
+ return;
+
+ case 'J': // Unsigned 12-bit constant
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
+ if (isUInt<12>(C->getZExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+ Op.getValueType()));
+ return;
+
+ case 'K': // Signed 16-bit constant
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
+ if (isInt<16>(C->getSExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
+ Op.getValueType()));
+ return;
+
+ case 'L': // Signed 20-bit displacement (on all targets we support)
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
+ if (isInt<20>(C->getSExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
+ Op.getValueType()));
+ return;
+
+ case 'M': // 0x7fffffff
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getZExtValue() == 0x7fffffff)
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+ Op.getValueType()));
+ return;
+ }
+ }
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling conventions
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
+ Type *ToType) const {
+ return isTruncateFree(FromType, ToType);
+}
+
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
+ return CI->isTailCall();
+}
+
+// We do not yet support 128-bit single-element vector types. If the user
+// attempts to use such types as function argument or return type, prefer
+// to error out instead of emitting code violating the ABI.
+static void VerifyVectorType(MVT VT, EVT ArgVT) {
+ if (ArgVT.isVector() && !VT.isVector())
+ report_fatal_error("Unsupported vector argument or return type");
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
+}
+
+// Value is a value that has been passed to us in the location described by VA
+// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
+// any loads onto Chain.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
+ CCValAssign &VA, SDValue Chain,
+ SDValue Value) {
+ // If the argument has been promoted from a smaller type, insert an
+ // assertion to capture this.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.isExtInLoc())
+ Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
+ else if (VA.getLocInfo() == CCValAssign::BCvt) {
+ // If this is a short vector argument loaded from the stack,
+ // extend from i64 to full vector size and then bitcast.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
+ Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
+ } else
+ assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
+ return Value;
+}
+
+// Value is a value of type VA.getValVT() that we need to copy into
+// the location described by VA. Return a copy of Value converted to
+// VA.getValVT(). The caller is responsible for handling indirect values.
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
+ CCValAssign &VA, SDValue Value) {
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::ZExt:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::AExt:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::BCvt:
+ // If this is a short vector argument to be stored to the stack,
+ // bitcast to v2i64 and then extract first element.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
+ case CCValAssign::Full:
+ return Value;
+ default:
+ llvm_unreachable("Unhandled getLocInfo()");
+ }
+}
+
+SDValue SystemZTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SystemZMachineFunctionInfo *FuncInfo =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // Detect unsupported vector argument types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Ins);
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+ unsigned NumFixedGPRs = 0;
+ unsigned NumFixedFPRs = 0;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ SDValue ArgValue;
+ CCValAssign &VA = ArgLocs[I];
+ EVT LocVT = VA.getLocVT();
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ const TargetRegisterClass *RC;
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+ // Integers smaller than i64 should be promoted to i64.
+ llvm_unreachable("Unexpected argument type");
+ case MVT::i32:
+ NumFixedGPRs += 1;
+ RC = &SystemZ::GR32BitRegClass;
+ break;
+ case MVT::i64:
+ NumFixedGPRs += 1;
+ RC = &SystemZ::GR64BitRegClass;
+ break;
+ case MVT::f32:
+ NumFixedFPRs += 1;
+ RC = &SystemZ::FP32BitRegClass;
+ break;
+ case MVT::f64:
+ NumFixedFPRs += 1;
+ RC = &SystemZ::FP64BitRegClass;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ RC = &SystemZ::VR128BitRegClass;
+ break;
+ }
+
+ unsigned VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(VA.getLocReg(), VReg);
+ ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+ } else {
+ assert(VA.isMemLoc() && "Argument not register or memory");
+
+ // Create the frame index object for this incoming parameter.
+ int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ // from this parameter. Unpromoted ints and floats are
+ // passed as right-justified 8-byte values.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+ DAG.getIntPtrConstant(4, DL));
+ ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ }
+
+ // Convert the value of the argument register into the value that's
+ // being passed.
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
+ InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
+ MachinePointerInfo()));
+ // If the original argument was split (e.g. i128), we need
+ // to load all parts of it here (using the same address).
+ unsigned ArgIndex = Ins[I].OrigArgIndex;
+ assert (Ins[I].PartOffset == 0);
+ while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
+ CCValAssign &PartVA = ArgLocs[I + 1];
+ unsigned PartOffset = Ins[I + 1].PartOffset;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+ DAG.getIntPtrConstant(PartOffset, DL));
+ InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+ MachinePointerInfo()));
+ ++I;
+ }
+ } else
+ InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+ }
+
+ if (IsVarArg) {
+ // Save the number of non-varargs registers for later use by va_start, etc.
+ FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
+ FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
+
+ // Likewise the address (in the form of a frame index) of where the
+ // first stack vararg would be. The 1-byte size here is arbitrary.
+ int64_t StackSize = CCInfo.getNextStackOffset();
+ FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
+
+ // ...and a similar frame index for the caller-allocated save area
+ // that will be used to store the incoming registers.
+ int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
+ unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
+ FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
+
+ // Store the FPR varargs in the reserved frame slots. (We store the
+ // GPRs as part of the prologue.)
+ if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+ SDValue MemOps[SystemZ::NumArgFPRs];
+ for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
+ unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
+ int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
+ &SystemZ::FP64BitRegClass);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
+ MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ }
+ // Join the stores, which are independent of one another.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ makeArrayRef(&MemOps[NumFixedFPRs],
+ SystemZ::NumArgFPRs-NumFixedFPRs));
+ }
+ }
+
+ return Chain;
+}
+
+static bool canUseSiblingCall(const CCState &ArgCCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ SmallVectorImpl<ISD::OutputArg> &Outs) {
+ // Punt if there are any indirect or stack arguments, or if the call
+ // needs the callee-saved argument register R6, or if the call uses
+ // the callee-saved register arguments SwiftSelf and SwiftError.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc())
+ return false;
+ unsigned Reg = VA.getLocReg();
+ if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
+ return false;
+ if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
+ return false;
+ }
+ return true;
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &DL = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = getPointerTy(MF.getDataLayout());
+
+ // Detect unsupported vector argument and return types.
+ if (Subtarget.hasVector()) {
+ VerifyVectorTypes(Outs);
+ VerifyVectorTypes(Ins);
+ }
+
+ // Analyze the operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+ ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+ // We don't support GuaranteedTailCallOpt, only automatically-detected
+ // sibling calls.
+ if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
+ IsTailCall = false;
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+ // Mark the start of the call.
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
+
+ // Copy argument values to their designated locations.
+ SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ SDValue ArgValue = OutVals[I];
+
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
+ // Store the argument in a stack slot and pass its address.
+ SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ // If the original argument was split (e.g. i128), we need
+ // to store all parts of it here (and pass just one address).
+ unsigned ArgIndex = Outs[I].OrigArgIndex;
+ assert (Outs[I].PartOffset == 0);
+ while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
+ SDValue PartValue = OutVals[I + 1];
+ unsigned PartOffset = Outs[I + 1].PartOffset;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
+ DAG.getIntPtrConstant(PartOffset, DL));
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, PartValue, Address,
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ ++I;
+ }
+ ArgValue = SpillSlot;
+ } else
+ ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
+
+ if (VA.isRegLoc())
+ // Queue up the argument copies and emit them at the end.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+ else {
+ assert(VA.isMemLoc() && "Argument not register or memory");
+
+ // Work out the address of the stack slot. Unpromoted ints and
+ // floats are passed as right-justified 8-byte values.
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
+ unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
+ if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+ Offset += 4;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(Offset, DL));
+
+ // Emit the store.
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
+ }
+ }
+
+ // Join the stores, which are independent of one another.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+ // Accept direct calls by converting symbolic call addresses to the
+ // associated Target* opcodes. Force %r1 to be used for indirect
+ // tail calls.
+ SDValue Glue;
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (IsTailCall) {
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
+ Glue = Chain.getValue(1);
+ Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+ }
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+ RegsToPass[I].second, Glue);
+ Glue = Chain.getValue(1);
+ }
+
+ // The first call operand is the chain and the second is the target address.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
+ Ops.push_back(DAG.getRegister(RegsToPass[I].first,
+ RegsToPass[I].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Glue the call to the argument copies, if any.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (IsTailCall)
+ return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
+ Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
+ Glue = Chain.getValue(1);
+
+ // Mark the end of the call, which is glued to the call itself.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, DL, PtrVT, true),
+ DAG.getConstant(0, DL, PtrVT, true),
+ Glue, DL);
+ Glue = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RetLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
+ RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+ CCValAssign &VA = RetLocs[I];
+
+ // Copy the value out, gluing the copy to the end of the call sequence.
+ SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
+ VA.getLocVT(), Glue);
+ Chain = RetValue.getValue(1);
+ Glue = RetValue.getValue(2);
+
+ // Convert the value of the return register into the value that's
+ // being returned.
+ InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
+ }
+
+ return Chain;
+}
+
+bool SystemZTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ // Detect unsupported vector return types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Outs);
+
+ // Special case that we cannot easily detect in RetCC_SystemZ since
+ // i128 is not a legal type.
+ for (auto &Out : Outs)
+ if (Out.ArgVT == MVT::i128)
+ return false;
+
+ SmallVector<CCValAssign, 16> RetLocs;
+ CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
+ return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
+}
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &DL, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Detect unsupported vector return types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Outs);
+
+ // Assign locations to each returned value.
+ SmallVector<CCValAssign, 16> RetLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
+ RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+ // Quick exit for void returns
+ if (RetLocs.empty())
+ return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
+
+ // Copy the result values into the output registers.
+ SDValue Glue;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain);
+ for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+ CCValAssign &VA = RetLocs[I];
+ SDValue RetValue = OutVals[I];
+
+ // Make the return register live on exit.
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Promote the value as required.
+ RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
+
+ // Chain and glue the copies together.
+ unsigned Reg = VA.getLocReg();
+ Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
+ }
+
+ // Update chain and glue.
+ RetOps[0] = Chain;
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
+
+ return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
+}
+
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument. Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+ unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_tbegin:
+ Opcode = SystemZISD::TBEGIN;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tbegin_nofloat:
+ Opcode = SystemZISD::TBEGIN_NOFLOAT;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tend:
+ Opcode = SystemZISD::TEND;
+ CCValid = SystemZ::CCMASK_TEND;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Return true if Op is an intrinsic node without chain that returns the
+// CC value as its final argument. Provide the associated SystemZISD
+// opcode and the mask of valid CC values if so.
+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpkshs:
+ case Intrinsic::s390_vpksfs:
+ case Intrinsic::s390_vpksgs:
+ Opcode = SystemZISD::PACKS_CC;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vpklshs:
+ case Intrinsic::s390_vpklsfs:
+ case Intrinsic::s390_vpklsgs:
+ Opcode = SystemZISD::PACKLS_CC;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vceqbs:
+ case Intrinsic::s390_vceqhs:
+ case Intrinsic::s390_vceqfs:
+ case Intrinsic::s390_vceqgs:
+ Opcode = SystemZISD::VICMPES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vchbs:
+ case Intrinsic::s390_vchhs:
+ case Intrinsic::s390_vchfs:
+ case Intrinsic::s390_vchgs:
+ Opcode = SystemZISD::VICMPHS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vchlbs:
+ case Intrinsic::s390_vchlhs:
+ case Intrinsic::s390_vchlfs:
+ case Intrinsic::s390_vchlgs:
+ Opcode = SystemZISD::VICMPHLS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vtm:
+ Opcode = SystemZISD::VTM;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfaebs:
+ case Intrinsic::s390_vfaehs:
+ case Intrinsic::s390_vfaefs:
+ Opcode = SystemZISD::VFAE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfaezbs:
+ case Intrinsic::s390_vfaezhs:
+ case Intrinsic::s390_vfaezfs:
+ Opcode = SystemZISD::VFAEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfeebs:
+ case Intrinsic::s390_vfeehs:
+ case Intrinsic::s390_vfeefs:
+ Opcode = SystemZISD::VFEE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfeezbs:
+ case Intrinsic::s390_vfeezhs:
+ case Intrinsic::s390_vfeezfs:
+ Opcode = SystemZISD::VFEEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfenebs:
+ case Intrinsic::s390_vfenehs:
+ case Intrinsic::s390_vfenefs:
+ Opcode = SystemZISD::VFENE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfenezbs:
+ case Intrinsic::s390_vfenezhs:
+ case Intrinsic::s390_vfenezfs:
+ Opcode = SystemZISD::VFENEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vistrbs:
+ case Intrinsic::s390_vistrhs:
+ case Intrinsic::s390_vistrfs:
+ Opcode = SystemZISD::VISTR_CC;
+ CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
+ return true;
+
+ case Intrinsic::s390_vstrcbs:
+ case Intrinsic::s390_vstrchs:
+ case Intrinsic::s390_vstrcfs:
+ Opcode = SystemZISD::VSTRC_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vstrczbs:
+ case Intrinsic::s390_vstrczhs:
+ case Intrinsic::s390_vstrczfs:
+ Opcode = SystemZISD::VSTRCZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfcedbs:
+ Opcode = SystemZISD::VFCMPES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfchdbs:
+ Opcode = SystemZISD::VFCMPHS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfchedbs:
+ Opcode = SystemZISD::VFCMPHES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vftcidb:
+ Opcode = SystemZISD::VFTCI;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_tdc:
+ Opcode = SystemZISD::TDC;
+ CCValid = SystemZ::CCMASK_TDC;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ Ops.push_back(Op.getOperand(0));
+ for (unsigned I = 2; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+ SDValue OldChain = SDValue(Op.getNode(), 1);
+ SDValue NewChain = SDValue(Intr.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+ return Intr;
+}
+
+// Emit an intrinsic with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ for (unsigned I = 1; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ if (Op->getNumValues() == 1)
+ return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
+ assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
+ SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
+ return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+}
+
+// CC is a comparison that will be implemented using an integer or
+// floating-point comparison. Return the condition code mask for
+// a branch on true. In the integer case, CCMASK_CMP_UO is set for
+// unsigned comparisons and clear for signed ones. In the floating-point
+// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
+static unsigned CCMaskForCondCode(ISD::CondCode CC) {
+#define CONV(X) \
+ case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
+ case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
+ case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Invalid integer condition!");
+
+ CONV(EQ);
+ CONV(NE);
+ CONV(GT);
+ CONV(GE);
+ CONV(LT);
+ CONV(LE);
+
+ case ISD::SETO: return SystemZ::CCMASK_CMP_O;
+ case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
+ }
+#undef CONV
+}
+
+// Return a sequence for getting a 1 from an IPM result when CC has a
+// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
+// The handling of CC values outside CCValid doesn't matter.
+static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
+ // Deal with cases where the result can be taken directly from a bit
+ // of the IPM result.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
+
+ // Deal with cases where we can add a value to force the sign bit
+ // to contain the right value. Putting the bit in 31 means we can
+ // use SRL rather than RISBG(L), and also makes it easier to get a
+ // 0/-1 value, so it has priority over the other tests below.
+ //
+ // These sequences rely on the fact that the upper two bits of the
+ // IPM result are zero.
+ uint64_t TopBit = uint64_t(1) << 31;
+ if (CCMask == (CCValid & SystemZ::CCMASK_0))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
+ return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2)))
+ return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_3))
+ return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ // Next try inverting the value and testing a bit. 0/1 could be
+ // handled this way too, but we dealt with that case above.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
+ return IPMConversion(-1, 0, SystemZ::IPM_CC);
+
+ // Handle cases where adding a value forces a non-sign bit to contain
+ // the right value.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
+ return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
+
+ // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are
+ // can be done by inverting the low CC bit and applying one of the
+ // sign-based extractions above.
+ if (CCMask == (CCValid & SystemZ::CCMASK_1))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_2))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ llvm_unreachable("Unexpected CC combination");
+}
+
+// If C can be converted to a comparison against zero, adjust the operands
+// as necessary.
+static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
+ if (C.ICmpType == SystemZICMP::UnsignedOnly)
+ return;
+
+ auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
+ if (!ConstOp1)
+ return;
+
+ int64_t Value = ConstOp1->getSExtValue();
+ if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
+ (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
+ (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
+ (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
+ C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
+ C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
+ }
+}
+
+// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
+// adjust the operands as necessary.
+static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
+ // For us to make any changes, it must a comparison between a single-use
+ // load and a constant.
+ if (!C.Op0.hasOneUse() ||
+ C.Op0.getOpcode() != ISD::LOAD ||
+ C.Op1.getOpcode() != ISD::Constant)
+ return;
+
+ // We must have an 8- or 16-bit load.
+ auto *Load = cast<LoadSDNode>(C.Op0);
+ unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
+ if (NumBits != 8 && NumBits != 16)
+ return;
+
+ // The load must be an extending one and the constant must be within the
+ // range of the unextended value.
+ auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
+ uint64_t Value = ConstOp1->getZExtValue();
+ uint64_t Mask = (1 << NumBits) - 1;
+ if (Load->getExtensionType() == ISD::SEXTLOAD) {
+ // Make sure that ConstOp1 is in range of C.Op0.
+ int64_t SignedValue = ConstOp1->getSExtValue();
+ if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
+ return;
+ if (C.ICmpType != SystemZICMP::SignedOnly) {
+ // Unsigned comparison between two sign-extended values is equivalent
+ // to unsigned comparison between two zero-extended values.
+ Value &= Mask;
+ } else if (NumBits == 8) {
+ // Try to treat the comparison as unsigned, so that we can use CLI.
+ // Adjust CCMask and Value as necessary.
+ if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
+ // Test whether the high bit of the byte is set.
+ Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
+ else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
+ // Test whether the high bit of the byte is clear.
+ Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
+ else
+ // No instruction exists for this combination.
+ return;
+ C.ICmpType = SystemZICMP::UnsignedOnly;
+ }
+ } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
+ if (Value > Mask)
+ return;
+ // If the constant is in range, we can use any comparison.
+ C.ICmpType = SystemZICMP::Any;
+ } else
+ return;
+
+ // Make sure that the first operand is an i32 of the right extension type.
+ ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
+ ISD::SEXTLOAD :
+ ISD::ZEXTLOAD);
+ if (C.Op0.getValueType() != MVT::i32 ||
+ Load->getExtensionType() != ExtType)
+ C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
+ Load->getBasePtr(), Load->getPointerInfo(),
+ Load->getMemoryVT(), Load->getAlignment(),
+ Load->getMemOperand()->getFlags());
+
+ // Make sure that the second operand is an i32 with the right value.
+ if (C.Op1.getValueType() != MVT::i32 ||
+ Value != ConstOp1->getZExtValue())
+ C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
+}
+
+// Return true if Op is either an unextended load, or a load suitable
+// for integer register-memory comparisons of type ICmpType.
+static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
+ auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
+ if (Load) {
+ // There are no instructions to compare a register with a memory byte.
+ if (Load->getMemoryVT() == MVT::i8)
+ return false;
+ // Otherwise decide on extension type.
+ switch (Load->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ return true;
+ case ISD::SEXTLOAD:
+ return ICmpType != SystemZICMP::UnsignedOnly;
+ case ISD::ZEXTLOAD:
+ return ICmpType != SystemZICMP::SignedOnly;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+// Return true if it is better to swap the operands of C.
+static bool shouldSwapCmpOperands(const Comparison &C) {
+ // Leave f128 comparisons alone, since they have no memory forms.
+ if (C.Op0.getValueType() == MVT::f128)
+ return false;
+
+ // Always keep a floating-point constant second, since comparisons with
+ // zero can use LOAD TEST and comparisons with other constants make a
+ // natural memory operand.
+ if (isa<ConstantFPSDNode>(C.Op1))
+ return false;
+
+ // Never swap comparisons with zero since there are many ways to optimize
+ // those later.
+ auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
+ if (ConstOp1 && ConstOp1->getZExtValue() == 0)
+ return false;
+
+ // Also keep natural memory operands second if the loaded value is
+ // only used here. Several comparisons have memory forms.
+ if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
+ return false;
+
+ // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
+ // In that case we generally prefer the memory to be second.
+ if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
+ // The only exceptions are when the second operand is a constant and
+ // we can use things like CHHSI.
+ if (!ConstOp1)
+ return true;
+ // The unsigned memory-immediate instructions can handle 16-bit
+ // unsigned integers.
+ if (C.ICmpType != SystemZICMP::SignedOnly &&
+ isUInt<16>(ConstOp1->getZExtValue()))
+ return false;
+ // The signed memory-immediate instructions can handle 16-bit
+ // signed integers.
+ if (C.ICmpType != SystemZICMP::UnsignedOnly &&
+ isInt<16>(ConstOp1->getSExtValue()))
+ return false;
+ return true;
+ }
+
+ // Try to promote the use of CGFR and CLGFR.
+ unsigned Opcode0 = C.Op0.getOpcode();
+ if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
+ return true;
+ if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
+ return true;
+ if (C.ICmpType != SystemZICMP::SignedOnly &&
+ Opcode0 == ISD::AND &&
+ C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
+ return true;
+
+ return false;
+}
+
+// Return a version of comparison CC mask CCMask in which the LT and GT
+// actions are swapped.
+static unsigned reverseCCMask(unsigned CCMask) {
+ return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+ (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_UO));
+}
+
+// Check whether C tests for equality between X and Y and whether X - Y
+// or Y - X is also computed. In that case it's better to compare the
+// result of the subtraction against zero.
+static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
+ if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
+ C.CCMask == SystemZ::CCMASK_CMP_NE) {
+ for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
+ SDNode *N = *I;
+ if (N->getOpcode() == ISD::SUB &&
+ ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
+ (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
+ C.Op0 = SDValue(N, 0);
+ C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
+ return;
+ }
+ }
+ }
+}
+
+// Check whether C compares a floating-point value with zero and if that
+// floating-point value is also negated. In this case we can use the
+// negation to set CC, so avoiding separate LOAD AND TEST and
+// LOAD (NEGATIVE/COMPLEMENT) instructions.
+static void adjustForFNeg(Comparison &C) {
+ auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
+ if (C1 && C1->isZero()) {
+ for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
+ SDNode *N = *I;
+ if (N->getOpcode() == ISD::FNEG) {
+ C.Op0 = SDValue(N, 0);
+ C.CCMask = reverseCCMask(C.CCMask);
+ return;
+ }
+ }
+ }
+}
+
+// Check whether C compares (shl X, 32) with 0 and whether X is
+// also sign-extended. In that case it is better to test the result
+// of the sign extension using LTGFR.
+//
+// This case is important because InstCombine transforms a comparison
+// with (sext (trunc X)) into a comparison with (shl X, 32).
+static void adjustForLTGFR(Comparison &C) {
+ // Check for a comparison between (shl X, 32) and 0.
+ if (C.Op0.getOpcode() == ISD::SHL &&
+ C.Op0.getValueType() == MVT::i64 &&
+ C.Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
+ auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
+ if (C1 && C1->getZExtValue() == 32) {
+ SDValue ShlOp0 = C.Op0.getOperand(0);
+ // See whether X has any SIGN_EXTEND_INREG uses.
+ for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
+ SDNode *N = *I;
+ if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
+ C.Op0 = SDValue(N, 0);
+ return;
+ }
+ }
+ }
+ }
+}
+
+// If C compares the truncation of an extending load, try to compare
+// the untruncated value instead. This exposes more opportunities to
+// reuse CC.
+static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
+ if (C.Op0.getOpcode() == ISD::TRUNCATE &&
+ C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
+ C.Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
+ auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
+ if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
+ unsigned Type = L->getExtensionType();
+ if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
+ (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
+ C.Op0 = C.Op0.getOperand(0);
+ C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
+ }
+ }
+ }
+}
+
+// Return true if shift operation N has an in-range constant shift value.
+// Store it in ShiftVal if so.
+static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
+ auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!Shift)
+ return false;
+
+ uint64_t Amount = Shift->getZExtValue();
+ if (Amount >= N.getValueSizeInBits())
+ return false;
+
+ ShiftVal = Amount;
+ return true;
+}
+
+// Check whether an AND with Mask is suitable for a TEST UNDER MASK
+// instruction and whether the CC value is descriptive enough to handle
+// a comparison of type Opcode between the AND result and CmpVal.
+// CCMask says which comparison result is being tested and BitSize is
+// the number of bits in the operands. If TEST UNDER MASK can be used,
+// return the corresponding CC mask, otherwise return 0.
+static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
+ uint64_t Mask, uint64_t CmpVal,
+ unsigned ICmpType) {
+ assert(Mask != 0 && "ANDs with zero should have been removed by now");
+
+ // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
+ if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
+ !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
+ return 0;
+
+ // Work out the masks for the lowest and highest bits.
+ unsigned HighShift = 63 - countLeadingZeros(Mask);
+ uint64_t High = uint64_t(1) << HighShift;
+ uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
+
+ // Signed ordered comparisons are effectively unsigned if the sign
+ // bit is dropped.
+ bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
+
+ // Check for equality comparisons with 0, or the equivalent.
+ if (CmpVal == 0) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal < Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+
+ // Check for equality comparisons with the mask, or the equivalent.
+ if (CmpVal == Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+
+ // Check for ordered comparisons with the top bit.
+ if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_MSB_1;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_MSB_1;
+ }
+
+ // If there are just two bits, we can do equality checks for Low and High
+ // as well.
+ if (Mask == Low + High) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
+ }
+
+ // Looks like we've exhausted our options.
+ return 0;
+}
+
+// See whether C can be implemented as a TEST UNDER MASK instruction.
+// Update the arguments with the TM version if so.
+static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
+ // Check that we have a comparison with a constant.
+ auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
+ if (!ConstOp1)
+ return;
+ uint64_t CmpVal = ConstOp1->getZExtValue();
+
+ // Check whether the nonconstant input is an AND with a constant mask.
+ Comparison NewC(C);
+ uint64_t MaskVal;
+ ConstantSDNode *Mask = nullptr;
+ if (C.Op0.getOpcode() == ISD::AND) {
+ NewC.Op0 = C.Op0.getOperand(0);
+ NewC.Op1 = C.Op0.getOperand(1);
+ Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
+ if (!Mask)
+ return;
+ MaskVal = Mask->getZExtValue();
+ } else {
+ // There is no instruction to compare with a 64-bit immediate
+ // so use TMHH instead if possible. We need an unsigned ordered
+ // comparison with an i64 immediate.
+ if (NewC.Op0.getValueType() != MVT::i64 ||
+ NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
+ NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
+ NewC.ICmpType == SystemZICMP::SignedOnly)
+ return;
+ // Convert LE and GT comparisons into LT and GE.
+ if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
+ NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
+ if (CmpVal == uint64_t(-1))
+ return;
+ CmpVal += 1;
+ NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
+ }
+ // If the low N bits of Op1 are zero than the low N bits of Op0 can
+ // be masked off without changing the result.
+ MaskVal = -(CmpVal & -CmpVal);
+ NewC.ICmpType = SystemZICMP::UnsignedOnly;
+ }
+ if (!MaskVal)
+ return;
+
+ // Check whether the combination of mask, comparison value and comparison
+ // type are suitable.
+ unsigned BitSize = NewC.Op0.getValueSizeInBits();
+ unsigned NewCCMask, ShiftVal;
+ if (NewC.ICmpType != SystemZICMP::SignedOnly &&
+ NewC.Op0.getOpcode() == ISD::SHL &&
+ isSimpleShift(NewC.Op0, ShiftVal) &&
+ (MaskVal >> ShiftVal != 0) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
+ MaskVal >> ShiftVal,
+ CmpVal >> ShiftVal,
+ SystemZICMP::Any))) {
+ NewC.Op0 = NewC.Op0.getOperand(0);
+ MaskVal >>= ShiftVal;
+ } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
+ NewC.Op0.getOpcode() == ISD::SRL &&
+ isSimpleShift(NewC.Op0, ShiftVal) &&
+ (MaskVal << ShiftVal != 0) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
+ MaskVal << ShiftVal,
+ CmpVal << ShiftVal,
+ SystemZICMP::UnsignedOnly))) {
+ NewC.Op0 = NewC.Op0.getOperand(0);
+ MaskVal <<= ShiftVal;
+ } else {
+ NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
+ NewC.ICmpType);
+ if (!NewCCMask)
+ return;
+ }
+
+ // Go ahead and make the change.
+ C.Opcode = SystemZISD::TM;
+ C.Op0 = NewC.Op0;
+ if (Mask && Mask->getZExtValue() == MaskVal)
+ C.Op1 = SDValue(Mask, 0);
+ else
+ C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
+ C.CCValid = SystemZ::CCMASK_TM;
+ C.CCMask = NewCCMask;
+}
+
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+ SDValue Call, unsigned CCValid, uint64_t CC,
+ ISD::CondCode Cond) {
+ Comparison C(Call, SDValue());
+ C.Opcode = Opcode;
+ C.CCValid = CCValid;
+ if (Cond == ISD::SETEQ)
+ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+ else if (Cond == ISD::SETNE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+ else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
+ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
+ else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
+ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
+ else
+ llvm_unreachable("Unexpected integer comparison type");
+ C.CCMask &= CCValid;
+ return C;
+}
+
+// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
+static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
+ ISD::CondCode Cond, const SDLoc &DL) {
+ if (CmpOp1.getOpcode() == ISD::Constant) {
+ uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+ unsigned Opcode, CCValid;
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
+ isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ }
+ Comparison C(CmpOp0, CmpOp1);
+ C.CCMask = CCMaskForCondCode(Cond);
+ if (C.Op0.getValueType().isFloatingPoint()) {
+ C.CCValid = SystemZ::CCMASK_FCMP;
+ C.Opcode = SystemZISD::FCMP;
+ adjustForFNeg(C);
+ } else {
+ C.CCValid = SystemZ::CCMASK_ICMP;
+ C.Opcode = SystemZISD::ICMP;
+ // Choose the type of comparison. Equality and inequality tests can
+ // use either signed or unsigned comparisons. The choice also doesn't
+ // matter if both sign bits are known to be clear. In those cases we
+ // want to give the main isel code the freedom to choose whichever
+ // form fits best.
+ if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
+ C.CCMask == SystemZ::CCMASK_CMP_NE ||
+ (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
+ C.ICmpType = SystemZICMP::Any;
+ else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
+ C.ICmpType = SystemZICMP::UnsignedOnly;
+ else
+ C.ICmpType = SystemZICMP::SignedOnly;
+ C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
+ adjustZeroCmp(DAG, DL, C);
+ adjustSubwordCmp(DAG, DL, C);
+ adjustForSubtraction(DAG, DL, C);
+ adjustForLTGFR(C);
+ adjustICmpTruncate(DAG, DL, C);
+ }
+
+ if (shouldSwapCmpOperands(C)) {
+ std::swap(C.Op0, C.Op1);
+ C.CCMask = reverseCCMask(C.CCMask);
+ }
+
+ adjustForTestUnderMask(DAG, DL, C);
+ return C;
+}
+
+// Emit the comparison instruction described by C.
+static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
+ if (!C.Op1.getNode()) {
+ SDValue Op;
+ switch (C.Op0.getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
+ break;
+ default:
+ llvm_unreachable("Invalid comparison operands");
+ }
+ return SDValue(Op.getNode(), Op->getNumValues() - 1);
+ }
+ if (C.Opcode == SystemZISD::ICMP)
+ return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
+ DAG.getConstant(C.ICmpType, DL, MVT::i32));
+ if (C.Opcode == SystemZISD::TM) {
+ bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
+ bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
+ return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
+ DAG.getConstant(RegisterOnly, DL, MVT::i32));
+ }
+ return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
+}
+
+// Implement a 32-bit *MUL_LOHI operation by extending both operands to
+// 64 bits. Extend is the extension type to use. Store the high part
+// in Hi and the low part in Lo.
+static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
+ SDValue Op0, SDValue Op1, SDValue &Hi,
+ SDValue &Lo) {
+ Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
+ Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
+ DAG.getConstant(32, DL, MVT::i64));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
+}
+
+// Lower a binary operation that produces two VT results, one in each
+// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
+// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
+// on the extended Op0 and (unextended) Op1. Store the even register result
+// in Even and the odd register result in Odd.
+static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ unsigned Extend, unsigned Opcode, SDValue Op0,
+ SDValue Op1, SDValue &Even, SDValue &Odd) {
+ SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
+ SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
+ SDValue(In128, 0), Op1);
+ bool Is32Bit = is32Bit(VT);
+ Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
+ Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
+}
+
+// Return an i32 value that is 1 if the CC value produced by Glue is
+// in the mask CCMask and 0 otherwise. CC is known to have a value
+// in CCValid, so other values can be ignored.
+static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
+ unsigned CCValid, unsigned CCMask) {
+ IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
+ SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+
+ if (Conversion.XORValue)
+ Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
+
+ if (Conversion.AddValue)
+ Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
+
+ // The SHR/AND sequence should get optimized to an RISBG.
+ Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.Bit, DL, MVT::i32));
+ if (Conversion.Bit != 31)
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+ DAG.getConstant(1, DL, MVT::i32));
+ return Result;
+}
+
+// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
+// be done directly. IsFP is true if CC is for a floating-point rather than
+// integer comparison.
+static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
+ switch (CC) {
+ case ISD::SETOEQ:
+ case ISD::SETEQ:
+ return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
+
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
+
+ case ISD::SETUGT:
+ return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
+
+ default:
+ return 0;
+ }
+}
+
+// Return the SystemZISD vector comparison operation for CC or its inverse,
+// or 0 if neither can be done directly. Indicate in Invert whether the
+// result is for the inverse of CC. IsFP is true if CC is for a
+// floating-point rather than integer comparison.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+ bool &Invert) {
+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ Invert = false;
+ return Opcode;
+ }
+
+ CC = ISD::getSetCCInverse(CC, !IsFP);
+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ Invert = true;
+ return Opcode;
+ }
+
+ return 0;
+}
+
+// Return a v2f64 that contains the extended form of elements Start and Start+1
+// of v4f32 value Op.
+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
+ SDValue Op) {
+ int Mask[] = { Start, -1, Start + 1, -1 };
+ Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+ return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
+}
+
+// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
+// producing a result of type VT.
+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
+ // There is no hardware support for v4f32, so extend the vector into
+ // two v2f64s and compare those.
+ if (CmpOp0.getValueType() == MVT::v4f32) {
+ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
+ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
+ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
+ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+ SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
+ SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
+ return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+ }
+ return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
+}
+
+// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
+// an integer mask of type VT.
+static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ ISD::CondCode CC, SDValue CmpOp0,
+ SDValue CmpOp1) {
+ bool IsFP = CmpOp0.getValueType().isFloatingPoint();
+ bool Invert = false;
+ SDValue Cmp;
+ switch (CC) {
+ // Handle tests for order using (or (ogt y x) (oge x y)).
+ case ISD::SETUO:
+ Invert = true;
+ case ISD::SETO: {
+ assert(IsFP && "Unexpected integer comparison");
+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+ break;
+ }
+
+ // Handle <> tests using (or (ogt y x) (ogt x y)).
+ case ISD::SETUEQ:
+ Invert = true;
+ case ISD::SETONE: {
+ assert(IsFP && "Unexpected integer comparison");
+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+ break;
+ }
+
+ // Otherwise a single comparison is enough. It doesn't really
+ // matter whether we try the inversion or the swap first, since
+ // there are no cases where both work.
+ default:
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
+ else {
+ CC = ISD::getSetCCSwappedOperands(CC);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
+ else
+ llvm_unreachable("Unhandled comparison");
+ }
+ break;
+ }
+ if (Invert) {
+ SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(65535, DL, MVT::i32));
+ Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
+ }
+ return Cmp;
+}
+
+SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CmpOp0 = Op.getOperand(0);
+ SDValue CmpOp1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
+
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
+ SDValue Glue = emitCmp(DAG, DL, C);
+ return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
+}
+
+SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue CmpOp0 = Op.getOperand(2);
+ SDValue CmpOp1 = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ SDLoc DL(Op);
+
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
+ SDValue Glue = emitCmp(DAG, DL, C);
+ return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
+ Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
+ DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
+}
+
+// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
+// allowing Pos and Neg to be wider than CmpOp.
+static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
+ return (Neg.getOpcode() == ISD::SUB &&
+ Neg.getOperand(0).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
+ Neg.getOperand(1) == Pos &&
+ (Pos == CmpOp ||
+ (Pos.getOpcode() == ISD::SIGN_EXTEND &&
+ Pos.getOperand(0) == CmpOp)));
+}
+
+// Return the absolute or negative absolute of Op; IsNegative decides which.
+static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
+ bool IsNegative) {
+ Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
+ if (IsNegative)
+ Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
+ DAG.getConstant(0, DL, Op.getValueType()), Op);
+ return Op;
+}
+
+SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CmpOp0 = Op.getOperand(0);
+ SDValue CmpOp1 = Op.getOperand(1);
+ SDValue TrueOp = Op.getOperand(2);
+ SDValue FalseOp = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDLoc DL(Op);
+
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
+
+ // Check for absolute and negative-absolute selections, including those
+ // where the comparison value is sign-extended (for LPGFR and LNGFR).
+ // This check supplements the one in DAGCombiner.
+ if (C.Opcode == SystemZISD::ICMP &&
+ C.CCMask != SystemZ::CCMASK_CMP_EQ &&
+ C.CCMask != SystemZ::CCMASK_CMP_NE &&
+ C.Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
+ if (isAbsolute(C.Op0, TrueOp, FalseOp))
+ return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
+ if (isAbsolute(C.Op0, FalseOp, TrueOp))
+ return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
+ }
+
+ SDValue Glue = emitCmp(DAG, DL, C);
+
+ // Special case for handling -1/0 results. The shifts we use here
+ // should get optimized with the IPM conversion sequence.
+ auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
+ auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
+ if (TrueC && FalseC) {
+ int64_t TrueVal = TrueC->getSExtValue();
+ int64_t FalseVal = FalseC->getSExtValue();
+ if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
+ // Invert the condition if we want -1 on false.
+ if (TrueVal == 0)
+ C.CCMask ^= C.CCValid;
+ SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
+ EVT VT = Op.getValueType();
+ // Extend the result to VT. Upper bits are ignored.
+ if (!is32Bit(VT))
+ Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
+ // Sign-extend from the low bit.
+ SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
+ return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
+ }
+ }
+
+ SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
+ DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
+}
+
+SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Node);
+ const GlobalValue *GV = Node->getGlobal();
+ int64_t Offset = Node->getOffset();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ CodeModel::Model CM = DAG.getTarget().getCodeModel();
+
+ SDValue Result;
+ if (Subtarget.isPC32DBLSymbol(GV, CM)) {
+ // Assign anchors at 1<<12 byte boundaries.
+ uint64_t Anchor = Offset & ~uint64_t(0xfff);
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+
+ // The offset can be folded into the address if it is aligned to a halfword.
+ Offset -= Anchor;
+ if (Offset != 0 && (Offset & 1) == 0) {
+ SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
+ Offset = 0;
+ }
+ } else {
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ }
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
+ DAG.getConstant(Offset, DL, PtrVT));
+
+ return Result;
+}
+
+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG,
+ unsigned Opcode,
+ SDValue GOTOffset) const {
+ SDLoc DL(Node);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Glue;
+
+ // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
+ Glue = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
+ Glue = Chain.getValue(1);
+
+ // The first call operand is the chain and the second is the TLS symbol.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
+ Node->getValueType(0),
+ 0, 0));
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
+ Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Glue the call to the argument copies.
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
+ Glue = Chain.getValue(1);
+
+ // Copy the return value from %r2.
+ return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
+}
+
+SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // The high part of the thread pointer is in access register 0.
+ SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
+ TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
+
+ // The low part of the thread pointer is in access register 1.
+ SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
+ TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
+
+ // Merge them into a single 64-bit address.
+ SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
+ DAG.getConstant(32, DL, PtrVT));
+ return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+}
+
+SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(Node, DAG);
+ SDLoc DL(Node);
+ const GlobalValue *GV = Node->getGlobal();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+
+ SDValue TP = lowerThreadPointer(DL, DAG);
+
+ // Get the offset of GA from the thread pointer, based on the TLS model.
+ SDValue Offset;
+ switch (model) {
+ case TLSModel::GeneralDynamic: {
+ // Load the GOT offset of the tls_index (module ID / per-symbol offset).
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+
+ // Call __tls_get_offset to retrieve the offset.
+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
+ break;
+ }
+
+ case TLSModel::LocalDynamic: {
+ // Load the GOT offset of the module ID.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+
+ // Call __tls_get_offset to retrieve the module base offset.
+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
+
+ // Note: The SystemZLDCleanupPass will remove redundant computations
+ // of the module base offset. Count total number of local-dynamic
+ // accesses to trigger execution of that pass.
+ SystemZMachineFunctionInfo* MFI =
+ DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+ // Add the per-symbol offset.
+ CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
+
+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+ DTPOffset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), DTPOffset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
+ break;
+ }
+
+ case TLSModel::InitialExec: {
+ // Load the offset from the GOT.
+ Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ SystemZII::MO_INDNTPOFF);
+ Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
+ Offset =
+ DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ break;
+ }
+
+ case TLSModel::LocalExec: {
+ // Force the offset into the constant pool and load it from there.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ break;
+ }
+ }
+
+ // Add the base and offset together.
+ return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
+}
+
+SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Node);
+ const BlockAddress *BA = Node->getBlockAddress();
+ int64_t Offset = Node->getOffset();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+ return Result;
+}
+
+SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
+ SelectionDAG &DAG) const {
+ SDLoc DL(JT);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+
+ // Use LARL to load the address of the table.
+ return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
+ SelectionDAG &DAG) const {
+ SDLoc DL(CP);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry())
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset());
+
+ // Use LARL to load the address of the constant pool entry.
+ return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
+
+ SDLoc DL(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // If the back chain frame index has not been allocated yet, do so.
+ SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
+ int BackChainIdx = FI->getFramePointerSaveIndex();
+ if (!BackChainIdx) {
+ // By definition, the frame address is the address of the back chain.
+ BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+ FI->setFramePointerSaveIndex(BackChainIdx);
+ }
+ SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
+
+ // FIXME The frontend should detect this case.
+ if (Depth > 0) {
+ report_fatal_error("Unsupported stack frame traversal count");
+ }
+
+ return BackChain;
+}
+
+SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ SDLoc DL(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // FIXME The frontend should detect this case.
+ if (Depth > 0) {
+ report_fatal_error("Unsupported stack frame traversal count");
+ }
+
+ // Return R14D, which has the return address. Mark it an implicit live-in.
+ unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
+}
+
+SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue In = Op.getOperand(0);
+ EVT InVT = In.getValueType();
+ EVT ResVT = Op.getValueType();
+
+ // Convert loads directly. This is normally done by DAGCombiner,
+ // but we need this case for bitcasts that are created during lowering
+ // and which are then lowered themselves.
+ if (auto *LoadN = dyn_cast<LoadSDNode>(In))
+ if (ISD::isNormalLoad(LoadN))
+ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
+ LoadN->getMemOperand());
+
+ if (InVT == MVT::i32 && ResVT == MVT::f32) {
+ SDValue In64;
+ if (Subtarget.hasHighWord()) {
+ SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
+ MVT::i64);
+ In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::i64, SDValue(U64, 0), In);
+ } else {
+ In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+ In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
+ DAG.getConstant(32, DL, MVT::i64));
+ }
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
+ DL, MVT::f32, Out64);
+ }
+ if (InVT == MVT::f32 && ResVT == MVT::i32) {
+ SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
+ MVT::f64, SDValue(U64, 0), In);
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
+ if (Subtarget.hasHighWord())
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
+ MVT::i32, Out64);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
+ DAG.getConstant(32, DL, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
+ }
+ llvm_unreachable("Unexpected bitcast combination");
+}
+
+SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SystemZMachineFunctionInfo *FuncInfo =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ SDLoc DL(Op);
+
+ // The initial values of each field.
+ const unsigned NumFields = 4;
+ SDValue Fields[NumFields] = {
+ DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
+ DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
+ DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
+ DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
+ };
+
+ // Store each field into its respective slot.
+ SDValue MemOps[NumFields];
+ unsigned Offset = 0;
+ for (unsigned I = 0; I < NumFields; ++I) {
+ SDValue FieldAddr = Addr;
+ if (Offset != 0)
+ FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
+ DAG.getIntPtrConstant(Offset, DL));
+ MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
+ MachinePointerInfo(SV, Offset));
+ Offset += 8;
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+}
+
+SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ SDLoc DL(Op);
+
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
+ /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+ /*isTailCall*/false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue SystemZTargetLowering::
+lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack");
+ bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+ SDLoc DL(Op);
+
+ // If user has set the no alignment function attribute, ignore
+ // alloca alignments.
+ uint64_t AlignVal = (RealignOpt ?
+ dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+
+ uint64_t StackAlign = TFI->getStackAlignment();
+ uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
+ uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
+
+ unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ SDValue NeededSpace = Size;
+
+ // Get a reference to the stack pointer.
+ SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+
+ // If we need a backchain, save it now.
+ SDValue Backchain;
+ if (StoreBackchain)
+ Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+
+ // Add extra space for alignment if needed.
+ if (ExtraAlignSpace)
+ NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+
+ // Get the new stack pointer value.
+ SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
+
+ // Copy the new stack pointer back.
+ Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+
+ // The allocated data lives above the 160 bytes allocated for the standard
+ // frame, plus any outgoing stack arguments. We don't know how much that
+ // amounts to yet, so emit a special ADJDYNALLOC placeholder.
+ SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+ SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+
+ // Dynamically realign if needed.
+ if (RequiredAlign > StackAlign) {
+ Result =
+ DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+ Result =
+ DAG.getNode(ISD::AND, DL, MVT::i64, Result,
+ DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
+ }
+
+ if (StoreBackchain)
+ Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+
+ SDValue Ops[2] = { Result, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+}
+
+SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else {
+ // Do a full 128-bit multiplication based on UMUL_LOHI64:
+ //
+ // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
+ //
+ // but using the fact that the upper halves are either all zeros
+ // or all ones:
+ //
+ // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
+ //
+ // and grouping the right terms together since they are quicker than the
+ // multiplication:
+ //
+ // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
+ SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
+ SDValue LL = Op.getOperand(0);
+ SDValue RL = Op.getOperand(1);
+ SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
+ SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. SMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ LL, RL, Ops[1], Ops[0]);
+ SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
+ SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
+ SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
+ Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
+ }
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. UMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned Opcode;
+
+ // We use DSGF for 32-bit division.
+ if (is32Bit(VT)) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
+ Opcode = SystemZISD::SDIVREM32;
+ } else if (DAG.ComputeNumSignBits(Op1) > 32) {
+ Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+ Opcode = SystemZISD::SDIVREM32;
+ } else
+ Opcode = SystemZISD::SDIVREM64;
+
+ // DSG(F) takes a 64-bit dividend, so the even register in the GR128
+ // input is "don't care". The instruction returns the remainder in
+ // the even register and the quotient in the odd register.
+ SDValue Ops[2];
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
+ Op0, Op1, Ops[1], Ops[0]);
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ // DL(G) uses a double-width dividend, so we need to clear the even
+ // register in the GR128 input. The instruction returns the remainder
+ // in the even register and the quotient in the odd register.
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ else
+ lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
+
+ // Get the known-zero masks for each operand.
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+ KnownBits Known[2];
+ DAG.computeKnownBits(Ops[0], Known[0]);
+ DAG.computeKnownBits(Ops[1], Known[1]);
+
+ // See if the upper 32 bits of one operand and the lower 32 bits of the
+ // other are known zero. They are the low and high operands respectively.
+ uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
+ Known[1].Zero.getZExtValue() };
+ unsigned High, Low;
+ if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
+ High = 1, Low = 0;
+ else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
+ High = 0, Low = 1;
+ else
+ return Op;
+
+ SDValue LowOp = Ops[Low];
+ SDValue HighOp = Ops[High];
+
+ // If the high part is a constant, we're better off using IILH.
+ if (HighOp.getOpcode() == ISD::Constant)
+ return Op;
+
+ // If the low part is a constant that is outside the range of LHI,
+ // then we're better off using IILF.
+ if (LowOp.getOpcode() == ISD::Constant) {
+ int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
+ if (!isInt<16>(Value))
+ return Op;
+ }
+
+ // Check whether the high part is an AND that doesn't change the
+ // high 32 bits and just masks out low bits. We can skip it if so.
+ if (HighOp.getOpcode() == ISD::AND &&
+ HighOp.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue HighOp0 = HighOp.getOperand(0);
+ uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
+ if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
+ HighOp = HighOp0;
+ }
+
+ // Take advantage of the fact that all GR32 operations only change the
+ // low 32 bits by truncating Low to an i32 and inserting it directly
+ // using a subreg. The interesting cases are those where the truncation
+ // can be folded.
+ SDLoc DL(Op);
+ SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
+ return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
+ MVT::i64, HighOp, Low32);
+}
+
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ Op = Op.getOperand(0);
+
+ // Handle vector types via VPOPCT.
+ if (VT.isVector()) {
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
+ switch (VT.getScalarSizeInBits()) {
+ case 8:
+ break;
+ case 16: {
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
+ SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
+ break;
+ }
+ case 32: {
+ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(0, DL, MVT::i32));
+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+ break;
+ }
+ case 64: {
+ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(0, DL, MVT::i32));
+ Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected type");
+ }
+ return Op;
+ }
+
+ // Get the known-zero mask for the operand.
+ KnownBits Known;
+ DAG.computeKnownBits(Op, Known);
+ unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
+ if (NumSignificantBits == 0)
+ return DAG.getConstant(0, DL, VT);
+
+ // Skip known-zero high parts of the operand.
+ int64_t OrigBitSize = VT.getSizeInBits();
+ int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+ BitSize = std::min(BitSize, OrigBitSize);
+
+ // The POPCNT instruction counts the number of bits in each byte.
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+ // Add up per-byte counts in a binary tree. All bits of Op at
+ // position larger than BitSize remain zero throughout.
+ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
+ if (BitSize != OrigBitSize)
+ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+ DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ }
+
+ // Extract overall result from high byte.
+ if (BitSize > 8)
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(BitSize - 8, DL, VT));
+
+ return Op;
+}
+
+SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
+ FenceScope == CrossThread) {
+ return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
+ Op.getOperand(0)),
+ 0);
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
+// Op is an atomic load. Lower it into a serialization followed
+// by a normal volatile load.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Node = cast<AtomicSDNode>(Op.getNode());
+ SDValue Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
+ MVT::Other, Node->getChain()), 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
+ Chain, Node->getBasePtr(),
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
+// Op is an atomic store. Lower it into a normal volatile store followed
+// by a serialization.
+SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Node = cast<AtomicSDNode>(Op.getNode());
+ SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
+ Node->getBasePtr(), Node->getMemoryVT(),
+ Node->getMemOperand());
+ return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
+ Chain), 0);
+}
+
+// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
+// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned Opcode) const {
+ auto *Node = cast<AtomicSDNode>(Op.getNode());
+
+ // 32-bit operations need no code outside the main loop.
+ EVT NarrowVT = Node->getMemoryVT();
+ EVT WideVT = MVT::i32;
+ if (NarrowVT == WideVT)
+ return Op;
+
+ int64_t BitSize = NarrowVT.getSizeInBits();
+ SDValue ChainIn = Node->getChain();
+ SDValue Addr = Node->getBasePtr();
+ SDValue Src2 = Node->getVal();
+ MachineMemOperand *MMO = Node->getMemOperand();
+ SDLoc DL(Node);
+ EVT PtrVT = Addr.getValueType();
+
+ // Convert atomic subtracts of constants into additions.
+ if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
+ if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
+ Opcode = SystemZISD::ATOMIC_LOADW_ADD;
+ Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
+ }
+
+ // Get the address of the containing word.
+ SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+ DAG.getConstant(-4, DL, PtrVT));
+
+ // Get the number of bits that the word must be rotated left in order
+ // to bring the field to the top bits of a GR32.
+ SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+ DAG.getConstant(3, DL, PtrVT));
+ BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+ // Get the complementing shift amount, for rotating a field in the top
+ // bits back to its proper position.
+ SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+ DAG.getConstant(0, DL, WideVT), BitShift);
+
+ // Extend the source operand to 32 bits and prepare it for the inner loop.
+ // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
+ // operations require the source to be shifted in advance. (This shift
+ // can be folded if the source is constant.) For AND and NAND, the lower
+ // bits must be set, while for other opcodes they should be left clear.
+ if (Opcode != SystemZISD::ATOMIC_SWAPW)
+ Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
+ DAG.getConstant(32 - BitSize, DL, WideVT));
+ if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
+ Opcode == SystemZISD::ATOMIC_LOADW_NAND)
+ Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
+ DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
+
+ // Construct the ATOMIC_LOADW_* node.
+ SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+ SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
+ DAG.getConstant(BitSize, DL, WideVT) };
+ SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
+ NarrowVT, MMO);
+
+ // Rotate the result of the final CS so that the field is in the lower
+ // bits of a GR32, then truncate it.
+ SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
+ DAG.getConstant(BitSize, DL, WideVT));
+ SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
+
+ SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
+ return DAG.getMergeValues(RetOps, DL);
+}
+
+// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
+// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
+// operations into additions.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Node = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = Node->getMemoryVT();
+ if (MemVT == MVT::i32 || MemVT == MVT::i64) {
+ // A full-width operation.
+ assert(Op.getValueType() == MemVT && "Mismatched VTs");
+ SDValue Src2 = Node->getVal();
+ SDValue NegSrc2;
+ SDLoc DL(Src2);
+
+ if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
+ // Use an addition if the operand is constant and either LAA(G) is
+ // available or the negative value is in the range of A(G)FHI.
+ int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
+ if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
+ NegSrc2 = DAG.getConstant(Value, DL, MemVT);
+ } else if (Subtarget.hasInterlockedAccess1())
+ // Use LAA(G) if available.
+ NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
+ Src2);
+
+ if (NegSrc2.getNode())
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
+ Node->getChain(), Node->getBasePtr(), NegSrc2,
+ Node->getMemOperand());
+
+ // Use the node as-is.
+ return Op;
+ }
+
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+}
+
+// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
+// into a fullword ATOMIC_CMP_SWAPW operation.
+SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Node = cast<AtomicSDNode>(Op.getNode());
+
+ // We have native support for 32-bit compare and swap.
+ EVT NarrowVT = Node->getMemoryVT();
+ EVT WideVT = MVT::i32;
+ if (NarrowVT == WideVT)
+ return Op;
+
+ int64_t BitSize = NarrowVT.getSizeInBits();
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue Addr = Node->getOperand(1);
+ SDValue CmpVal = Node->getOperand(2);
+ SDValue SwapVal = Node->getOperand(3);
+ MachineMemOperand *MMO = Node->getMemOperand();
+ SDLoc DL(Node);
+ EVT PtrVT = Addr.getValueType();
+
+ // Get the address of the containing word.
+ SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+ DAG.getConstant(-4, DL, PtrVT));
+
+ // Get the number of bits that the word must be rotated left in order
+ // to bring the field to the top bits of a GR32.
+ SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+ DAG.getConstant(3, DL, PtrVT));
+ BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+ // Get the complementing shift amount, for rotating a field in the top
+ // bits back to its proper position.
+ SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+ DAG.getConstant(0, DL, WideVT), BitShift);
+
+ // Construct the ATOMIC_CMP_SWAPW node.
+ SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+ SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
+ NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
+ SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
+ VTList, Ops, NarrowVT, MMO);
+ return AtomicOp;
+}
+
+SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+ return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
+ SystemZ::R15D, Op.getValueType());
+}
+
+SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+ bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue NewSP = Op.getOperand(1);
+ SDValue Backchain;
+ SDLoc DL(Op);
+
+ if (StoreBackchain) {
+ SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
+ Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+ }
+
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
+
+ if (StoreBackchain)
+ Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+
+ return Chain;
+}
+
+SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (!IsData)
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ SDLoc DL(Op);
+ bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
+ auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDValue Ops[] = {
+ Op.getOperand(0),
+ DAG.getConstant(Code, DL, MVT::i32),
+ Op.getOperand(1)
+ };
+ return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
+ Node->getVTList(), Ops,
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+ SDLoc DL(After);
+ SDValue Glue = SDValue(After, After->getNumValues() - 1);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
+ SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ if (Op->getNumValues() == 1)
+ return CC;
+ assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
+ CC);
+ }
+
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::thread_pointer:
+ return lowerThreadPointer(SDLoc(Op), DAG);
+
+ case Intrinsic::s390_vpdi:
+ return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vperm:
+ return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vuphb:
+ case Intrinsic::s390_vuphh:
+ case Intrinsic::s390_vuphf:
+ return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vuplhb:
+ case Intrinsic::s390_vuplhh:
+ case Intrinsic::s390_vuplhf:
+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vuplb:
+ case Intrinsic::s390_vuplhw:
+ case Intrinsic::s390_vuplf:
+ return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vupllb:
+ case Intrinsic::s390_vupllh:
+ case Intrinsic::s390_vupllf:
+ return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vsumb:
+ case Intrinsic::s390_vsumh:
+ case Intrinsic::s390_vsumgh:
+ case Intrinsic::s390_vsumgf:
+ case Intrinsic::s390_vsumqf:
+ case Intrinsic::s390_vsumqg:
+ return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ return SDValue();
+}
+
+namespace {
+// Says that SystemZISD operation Opcode can be used to perform the equivalent
+// of a VPERM with permute vector Bytes. If Opcode takes three operands,
+// Operand is the constant third operand, otherwise it is the number of
+// bytes in each element of the result.
+struct Permute {
+ unsigned Opcode;
+ unsigned Operand;
+ unsigned char Bytes[SystemZ::VectorBytes];
+};
+}
+
+static const Permute PermuteForms[] = {
+ // VMRHG
+ { SystemZISD::MERGE_HIGH, 8,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
+ // VMRHF
+ { SystemZISD::MERGE_HIGH, 4,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
+ // VMRHH
+ { SystemZISD::MERGE_HIGH, 2,
+ { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
+ // VMRHB
+ { SystemZISD::MERGE_HIGH, 1,
+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
+ // VMRLG
+ { SystemZISD::MERGE_LOW, 8,
+ { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
+ // VMRLF
+ { SystemZISD::MERGE_LOW, 4,
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+ // VMRLH
+ { SystemZISD::MERGE_LOW, 2,
+ { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
+ // VMRLB
+ { SystemZISD::MERGE_LOW, 1,
+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
+ // VPKG
+ { SystemZISD::PACK, 4,
+ { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
+ // VPKF
+ { SystemZISD::PACK, 2,
+ { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
+ // VPKH
+ { SystemZISD::PACK, 1,
+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
+ // VPDI V1, V2, 4 (low half of V1, high half of V2)
+ { SystemZISD::PERMUTE_DWORDS, 4,
+ { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
+ // VPDI V1, V2, 1 (high half of V1, low half of V2)
+ { SystemZISD::PERMUTE_DWORDS, 1,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
+};
+
+// Called after matching a vector shuffle against a particular pattern.
+// Both the original shuffle and the pattern have two vector operands.
+// OpNos[0] is the operand of the original shuffle that should be used for
+// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
+// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
+// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
+// for operands 0 and 1 of the pattern.
+static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
+ if (OpNos[0] < 0) {
+ if (OpNos[1] < 0)
+ return false;
+ OpNo0 = OpNo1 = OpNos[1];
+ } else if (OpNos[1] < 0) {
+ OpNo0 = OpNo1 = OpNos[0];
+ } else {
+ OpNo0 = OpNos[0];
+ OpNo1 = OpNos[1];
+ }
+ return true;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. Return true if the VPERM can be implemented using P.
+// When returning true set OpNo0 to the VPERM operand that should be
+// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
+//
+// For example, if swapping the VPERM operands allows P to match, OpNo0
+// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
+// operand, but rewriting it to use two duplicated operands allows it to
+// match P, then OpNo0 and OpNo1 will be the same.
+static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
+ unsigned &OpNo0, unsigned &OpNo1) {
+ int OpNos[] = { -1, -1 };
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ int Elt = Bytes[I];
+ if (Elt >= 0) {
+ // Make sure that the two permute vectors use the same suboperand
+ // byte number. Only the operand numbers (the high bits) are
+ // allowed to differ.
+ if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
+ return false;
+ int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
+ int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
+ // Make sure that the operand mappings are consistent with previous
+ // elements.
+ if (OpNos[ModelOpNo] == 1 - RealOpNo)
+ return false;
+ OpNos[ModelOpNo] = RealOpNo;
+ }
+ }
+ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
+}
+
+// As above, but search for a matching permute.
+static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
+ unsigned &OpNo0, unsigned &OpNo1) {
+ for (auto &P : PermuteForms)
+ if (matchPermute(Bytes, P, OpNo0, OpNo1))
+ return &P;
+ return nullptr;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. This permute is an operand of an outer permute.
+// See whether redistributing the -1 bytes gives a shuffle that can be
+// implemented using P. If so, set Transform to a VPERM-like permute vector
+// that, when applied to the result of P, gives the original permute in Bytes.
+static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
+ const Permute &P,
+ SmallVectorImpl<int> &Transform) {
+ unsigned To = 0;
+ for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
+ int Elt = Bytes[From];
+ if (Elt < 0)
+ // Byte number From of the result is undefined.
+ Transform[From] = -1;
+ else {
+ while (P.Bytes[To] != Elt) {
+ To += 1;
+ if (To == SystemZ::VectorBytes)
+ return false;
+ }
+ Transform[From] = To;
+ }
+ }
+ return true;
+}
+
+// As above, but search for a matching permute.
+static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
+ SmallVectorImpl<int> &Transform) {
+ for (auto &P : PermuteForms)
+ if (matchDoublePermute(Bytes, P, Transform))
+ return &P;
+ return nullptr;
+}
+
+// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// as if it had type vNi8.
+static void getVPermMask(ShuffleVectorSDNode *VSN,
+ SmallVectorImpl<int> &Bytes) {
+ EVT VT = VSN->getValueType(0);
+ unsigned NumElements = VT.getVectorNumElements();
+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+ Bytes.resize(NumElements * BytesPerElement, -1);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ int Index = VSN->getMaskElt(I);
+ if (Index >= 0)
+ for (unsigned J = 0; J < BytesPerElement; ++J)
+ Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+ }
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
+// the result come from a contiguous sequence of bytes from one input.
+// Set Base to the selector for the first byte if so.
+static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
+ unsigned BytesPerElement, int &Base) {
+ Base = -1;
+ for (unsigned I = 0; I < BytesPerElement; ++I) {
+ if (Bytes[Start + I] >= 0) {
+ unsigned Elem = Bytes[Start + I];
+ if (Base < 0) {
+ Base = Elem - I;
+ // Make sure the bytes would come from one input operand.
+ if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
+ return false;
+ } else if (unsigned(Base) != Elem - I)
+ return false;
+ }
+ }
+ return true;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. Return true if it can be performed using VSLDI.
+// When returning true, set StartIndex to the shift amount and OpNo0
+// and OpNo1 to the VPERM operands that should be used as the first
+// and second shift operand respectively.
+static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
+ unsigned &StartIndex, unsigned &OpNo0,
+ unsigned &OpNo1) {
+ int OpNos[] = { -1, -1 };
+ int Shift = -1;
+ for (unsigned I = 0; I < 16; ++I) {
+ int Index = Bytes[I];
+ if (Index >= 0) {
+ int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
+ int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
+ int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
+ if (Shift < 0)
+ Shift = ExpectedShift;
+ else if (Shift != ExpectedShift)
+ return false;
+ // Make sure that the operand mappings are consistent with previous
+ // elements.
+ if (OpNos[ModelOpNo] == 1 - RealOpNo)
+ return false;
+ OpNos[ModelOpNo] = RealOpNo;
+ }
+ }
+ StartIndex = Shift;
+ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
+}
+
+// Create a node that performs P on operands Op0 and Op1, casting the
+// operands to the appropriate type. The type of the result is determined by P.
+static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
+ const Permute &P, SDValue Op0, SDValue Op1) {
+ // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
+ // elements of a PACK are twice as wide as the outputs.
+ unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
+ P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
+ P.Operand);
+ // Cast both operands to the appropriate type.
+ MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
+ SystemZ::VectorBytes / InBytes);
+ Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
+ SDValue Op;
+ if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
+ SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
+ Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
+ } else if (P.Opcode == SystemZISD::PACK) {
+ MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
+ SystemZ::VectorBytes / P.Operand);
+ Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
+ } else {
+ Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
+ }
+ return Op;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
+// VSLDI or VPERM.
+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue *Ops,
+ const SmallVectorImpl<int> &Bytes) {
+ for (unsigned I = 0; I < 2; ++I)
+ Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
+
+ // First see whether VSLDI can be used.
+ unsigned StartIndex, OpNo0, OpNo1;
+ if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
+ return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
+ Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
+
+ // Fall back on VPERM. Construct an SDNode for the permute vector.
+ SDValue IndexNodes[SystemZ::VectorBytes];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= 0)
+ IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
+ else
+ IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+ SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+}
+
+namespace {
+// Describes a general N-operand vector shuffle.
+struct GeneralShuffle {
+ GeneralShuffle(EVT vt) : VT(vt) {}
+ void addUndef();
+ bool add(SDValue, unsigned);
+ SDValue getNode(SelectionDAG &, const SDLoc &);
+
+ // The operands of the shuffle.
+ SmallVector<SDValue, SystemZ::VectorBytes> Ops;
+
+ // Index I is -1 if byte I of the result is undefined. Otherwise the
+ // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
+ // Bytes[I] / SystemZ::VectorBytes.
+ SmallVector<int, SystemZ::VectorBytes> Bytes;
+
+ // The type of the shuffle result.
+ EVT VT;
+};
+}
+
+// Add an extra undefined element to the shuffle.
+void GeneralShuffle::addUndef() {
+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+ for (unsigned I = 0; I < BytesPerElement; ++I)
+ Bytes.push_back(-1);
+}
+
+// Add an extra element to the shuffle, taking it from element Elem of Op.
+// A null Op indicates a vector input whose value will be calculated later;
+// there is at most one such input per shuffle and it always has the same
+// type as the result. Aborts and returns false if the source vector elements
+// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
+// LLVM they become implicitly extended, but this is rare and not optimized.
+bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+
+ // The source vector can have wider elements than the result,
+ // either through an explicit TRUNCATE or because of type legalization.
+ // We want the least significant part.
+ EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
+ unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
+
+ // Return false if the source elements are smaller than their destination
+ // elements.
+ if (FromBytesPerElement < BytesPerElement)
+ return false;
+
+ unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
+ (FromBytesPerElement - BytesPerElement));
+
+ // Look through things like shuffles and bitcasts.
+ while (Op.getNode()) {
+ if (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
+ // See whether the bytes we need come from a contiguous part of one
+ // operand.
+ SmallVector<int, SystemZ::VectorBytes> OpBytes;
+ getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+ int NewByte;
+ if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
+ break;
+ if (NewByte < 0) {
+ addUndef();
+ return true;
+ }
+ Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
+ Byte = unsigned(NewByte) % SystemZ::VectorBytes;
+ } else if (Op.isUndef()) {
+ addUndef();
+ return true;
+ } else
+ break;
+ }
+
+ // Make sure that the source of the extraction is in Ops.
+ unsigned OpNo = 0;
+ for (; OpNo < Ops.size(); ++OpNo)
+ if (Ops[OpNo] == Op)
+ break;
+ if (OpNo == Ops.size())
+ Ops.push_back(Op);
+
+ // Add the element to Bytes.
+ unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
+ for (unsigned I = 0; I < BytesPerElement; ++I)
+ Bytes.push_back(Base + I);
+
+ return true;
+}
+
+// Return SDNodes for the completed shuffle.
+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
+ assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
+
+ if (Ops.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ // Make sure that there are at least two shuffle operands.
+ if (Ops.size() == 1)
+ Ops.push_back(DAG.getUNDEF(MVT::v16i8));
+
+ // Create a tree of shuffles, deferring root node until after the loop.
+ // Try to redistribute the undefined elements of non-root nodes so that
+ // the non-root shuffles match something like a pack or merge, then adjust
+ // the parent node's permute vector to compensate for the new order.
+ // Among other things, this copes with vectors like <2 x i16> that were
+ // padded with undefined elements during type legalization.
+ //
+ // In the best case this redistribution will lead to the whole tree
+ // using packs and merges. It should rarely be a loss in other cases.
+ unsigned Stride = 1;
+ for (; Stride * 2 < Ops.size(); Stride *= 2) {
+ for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
+ SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
+
+ // Create a mask for just these two operands.
+ SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
+ unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
+ if (OpNo == I)
+ NewBytes[J] = Byte;
+ else if (OpNo == I + Stride)
+ NewBytes[J] = SystemZ::VectorBytes + Byte;
+ else
+ NewBytes[J] = -1;
+ }
+ // See if it would be better to reorganize NewMask to avoid using VPERM.
+ SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
+ if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
+ Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
+ // Applying NewBytesMap to Ops[I] gets back to NewBytes.
+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
+ if (NewBytes[J] >= 0) {
+ assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
+ "Invalid double permute");
+ Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
+ } else
+ assert(NewBytesMap[J] < 0 && "Invalid double permute");
+ }
+ } else {
+ // Just use NewBytes on the operands.
+ Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
+ if (NewBytes[J] >= 0)
+ Bytes[J] = I * SystemZ::VectorBytes + J;
+ }
+ }
+ }
+
+ // Now we just have 2 inputs. Put the second operand in Ops[1].
+ if (Stride > 1) {
+ Ops[1] = Ops[Stride];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= int(SystemZ::VectorBytes))
+ Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
+ }
+
+ // Look for an instruction that can do the permute without resorting
+ // to VPERM.
+ unsigned OpNo0, OpNo1;
+ SDValue Op;
+ if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+ Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
+ else
+ Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
+// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
+static bool isScalarToVector(SDValue Op) {
+ for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
+ if (!Op.getOperand(I).isUndef())
+ return false;
+ return true;
+}
+
+// Return a vector of type VT that contains Value in the first element.
+// The other elements don't matter.
+static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SDValue Value) {
+ // If we have a constant, replicate it to all elements and let the
+ // BUILD_VECTOR lowering take care of it.
+ if (Value.getOpcode() == ISD::Constant ||
+ Value.getOpcode() == ISD::ConstantFP) {
+ SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+ if (Value.isUndef())
+ return DAG.getUNDEF(VT);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+}
+
+// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
+// element 1. Used for cases in which replication is cheap.
+static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SDValue Op0, SDValue Op1) {
+ if (Op0.isUndef()) {
+ if (Op1.isUndef())
+ return DAG.getUNDEF(VT);
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
+ }
+ if (Op1.isUndef())
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
+ return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
+ buildScalarToVector(DAG, DL, VT, Op0),
+ buildScalarToVector(DAG, DL, VT, Op1));
+}
+
+// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
+// vector for them.
+static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
+ SDValue Op1) {
+ if (Op0.isUndef() && Op1.isUndef())
+ return DAG.getUNDEF(MVT::v2i64);
+ // If one of the two inputs is undefined then replicate the other one,
+ // in order to avoid using another register unnecessarily.
+ if (Op0.isUndef())
+ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
+ else if (Op1.isUndef())
+ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ else {
+ Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
+ }
+ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
+}
+
+// Try to represent constant BUILD_VECTOR node BVN using a
+// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
+// on success.
+static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
+ EVT ElemVT = BVN->getValueType(0).getVectorElementType();
+ unsigned BytesPerElement = ElemVT.getStoreSize();
+ for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
+ SDValue Op = BVN->getOperand(I);
+ if (!Op.isUndef()) {
+ uint64_t Value;
+ if (Op.getOpcode() == ISD::Constant)
+ Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
+ else if (Op.getOpcode() == ISD::ConstantFP)
+ Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
+ .getZExtValue());
+ else
+ return false;
+ for (unsigned J = 0; J < BytesPerElement; ++J) {
+ uint64_t Byte = (Value >> (J * 8)) & 0xff;
+ if (Byte == 0xff)
+ Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
+ else if (Byte != 0)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Try to load a vector constant in which BitsPerElement-bit value Value
+// is replicated to fill the vector. VT is the type of the resulting
+// constant, which may have elements of a different size from BitsPerElement.
+// Return the SDValue of the constant on success, otherwise return
+// an empty value.
+static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
+ const SystemZInstrInfo *TII,
+ const SDLoc &DL, EVT VT, uint64_t Value,
+ unsigned BitsPerElement) {
+ // Signed 16-bit values can be replicated using VREPI.
+ int64_t SignedValue = SignExtend64(Value, BitsPerElement);
+ if (isInt<16>(SignedValue)) {
+ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
+ SystemZ::VectorBits / BitsPerElement);
+ SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
+ DAG.getConstant(SignedValue, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ // See whether rotating the constant left some N places gives a value that
+ // is one less than a power of 2 (i.e. all zeros followed by all ones).
+ // If so we can use VGM.
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
+ // isRxSBGMask returns the bit numbers for a full 64-bit value,
+ // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
+ // bit numbers for an BitsPerElement value, so that 0 denotes
+ // 1 << (BitsPerElement-1).
+ Start -= 64 - BitsPerElement;
+ End -= 64 - BitsPerElement;
+ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
+ SystemZ::VectorBits / BitsPerElement);
+ SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
+ DAG.getConstant(Start, DL, MVT::i32),
+ DAG.getConstant(End, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ return SDValue();
+}
+
+// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
+// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
+// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
+// would benefit from this representation and return it if so.
+static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
+ BuildVectorSDNode *BVN) {
+ EVT VT = BVN->getValueType(0);
+ unsigned NumElements = VT.getVectorNumElements();
+
+ // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
+ // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
+ // need a BUILD_VECTOR, add an additional placeholder operand for that
+ // BUILD_VECTOR and store its operands in ResidueOps.
+ GeneralShuffle GS(VT);
+ SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
+ bool FoundOne = false;
+ for (unsigned I = 0; I < NumElements; ++I) {
+ SDValue Op = BVN->getOperand(I);
+ if (Op.getOpcode() == ISD::TRUNCATE)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op.getOperand(1).getOpcode() == ISD::Constant) {
+ unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (!GS.add(Op.getOperand(0), Elem))
+ return SDValue();
+ FoundOne = true;
+ } else if (Op.isUndef()) {
+ GS.addUndef();
+ } else {
+ if (!GS.add(SDValue(), ResidueOps.size()))
+ return SDValue();
+ ResidueOps.push_back(BVN->getOperand(I));
+ }
+ }
+
+ // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
+ if (!FoundOne)
+ return SDValue();
+
+ // Create the BUILD_VECTOR for the remaining elements, if any.
+ if (!ResidueOps.empty()) {
+ while (ResidueOps.size() < NumElements)
+ ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
+ for (auto &Op : GS.Ops) {
+ if (!Op.getNode()) {
+ Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
+ break;
+ }
+ }
+ }
+ return GS.getNode(DAG, SDLoc(BVN));
+}
+
+// Combine GPR scalar values Elems into a vector of type VT.
+static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SmallVectorImpl<SDValue> &Elems) {
+ // See whether there is a single replicated value.
+ SDValue Single;
+ unsigned int NumElements = Elems.size();
+ unsigned int Count = 0;
+ for (auto Elem : Elems) {
+ if (!Elem.isUndef()) {
+ if (!Single.getNode())
+ Single = Elem;
+ else if (Elem != Single) {
+ Single = SDValue();
+ break;
+ }
+ Count += 1;
+ }
+ }
+ // There are three cases here:
+ //
+ // - if the only defined element is a loaded one, the best sequence
+ // is a replicating load.
+ //
+ // - otherwise, if the only defined element is an i64 value, we will
+ // end up with the same VLVGP sequence regardless of whether we short-cut
+ // for replication or fall through to the later code.
+ //
+ // - otherwise, if the only defined element is an i32 or smaller value,
+ // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
+ // This is only a win if the single defined element is used more than once.
+ // In other cases we're better off using a single VLVGx.
+ if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
+
+ // If all elements are loads, use VLREP/VLEs (below).
+ bool AllLoads = true;
+ for (auto Elem : Elems)
+ if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
+ AllLoads = false;
+ break;
+ }
+
+ // The best way of building a v2i64 from two i64s is to use VLVGP.
+ if (VT == MVT::v2i64 && !AllLoads)
+ return joinDwords(DAG, DL, Elems[0], Elems[1]);
+
+ // Use a 64-bit merge high to combine two doubles.
+ if (VT == MVT::v2f64 && !AllLoads)
+ return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+
+ // Build v4f32 values directly from the FPRs:
+ //
+ // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
+ // V V VMRHF
+ // <ABxx> <CDxx>
+ // V VMRHG
+ // <ABCD>
+ if (VT == MVT::v4f32 && !AllLoads) {
+ SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+ SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
+ // Avoid unnecessary undefs by reusing the other operand.
+ if (Op01.isUndef())
+ Op01 = Op23;
+ else if (Op23.isUndef())
+ Op23 = Op01;
+ // Merging identical replications is a no-op.
+ if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
+ return Op01;
+ Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
+ Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
+ SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
+ DL, MVT::v2i64, Op01, Op23);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+
+ // Collect the constant terms.
+ SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
+ SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
+
+ unsigned NumConstants = 0;
+ for (unsigned I = 0; I < NumElements; ++I) {
+ SDValue Elem = Elems[I];
+ if (Elem.getOpcode() == ISD::Constant ||
+ Elem.getOpcode() == ISD::ConstantFP) {
+ NumConstants += 1;
+ Constants[I] = Elem;
+ Done[I] = true;
+ }
+ }
+ // If there was at least one constant, fill in the other elements of
+ // Constants with undefs to get a full vector constant and use that
+ // as the starting point.
+ SDValue Result;
+ if (NumConstants > 0) {
+ for (unsigned I = 0; I < NumElements; ++I)
+ if (!Constants[I].getNode())
+ Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
+ Result = DAG.getBuildVector(VT, DL, Constants);
+ } else {
+ // Otherwise try to use VLREP or VLVGP to start the sequence in order to
+ // avoid a false dependency on any previous contents of the vector
+ // register.
+
+ // Use a VLREP if at least one element is a load.
+ unsigned LoadElIdx = UINT_MAX;
+ for (unsigned I = 0; I < NumElements; ++I)
+ if (Elems[I].getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Elems[I])->isUnindexed()) {
+ LoadElIdx = I;
+ break;
+ }
+ if (LoadElIdx != UINT_MAX) {
+ Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
+ Done[LoadElIdx] = true;
+ } else {
+ // Try to use VLVGP.
+ unsigned I1 = NumElements / 2 - 1;
+ unsigned I2 = NumElements - 1;
+ bool Def1 = !Elems[I1].isUndef();
+ bool Def2 = !Elems[I2].isUndef();
+ if (Def1 || Def2) {
+ SDValue Elem1 = Elems[Def1 ? I1 : I2];
+ SDValue Elem2 = Elems[Def2 ? I2 : I1];
+ Result = DAG.getNode(ISD::BITCAST, DL, VT,
+ joinDwords(DAG, DL, Elem1, Elem2));
+ Done[I1] = true;
+ Done[I2] = true;
+ } else
+ Result = DAG.getUNDEF(VT);
+ }
+ }
+
+ // Use VLVGx to insert the other elements.
+ for (unsigned I = 0; I < NumElements; ++I)
+ if (!Done[I] && !Elems[I].isUndef())
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
+ DAG.getConstant(I, DL, MVT::i32));
+ return Result;
+}
+
+SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ if (BVN->isConstant()) {
+ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
+ // preferred way of creating all-zero and all-one vectors so give it
+ // priority over other methods below.
+ uint64_t Mask = 0;
+ if (tryBuildVectorByteMask(BVN, Mask)) {
+ SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(Mask, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+
+ // Try using some form of replication.
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ 8, true) &&
+ SplatBitSize <= 64) {
+ // First try assuming that any undefined bits above the highest set bit
+ // and below the lowest set bit are 1s. This increases the likelihood of
+ // being able to use a sign-extended element value in VECTOR REPLICATE
+ // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+ uint64_t SplatBitsZ = SplatBits.getZExtValue();
+ uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+ uint64_t Lower = (SplatUndefZ
+ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+ uint64_t Upper = (SplatUndefZ
+ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+ uint64_t Value = SplatBitsZ | Upper | Lower;
+ SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
+ SplatBitSize);
+ if (Op.getNode())
+ return Op;
+
+ // Now try assuming that any undefined bits between the first and
+ // last defined set bits are set. This increases the chances of
+ // using a non-wraparound mask.
+ uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+ Value = SplatBitsZ | Middle;
+ Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
+ if (Op.getNode())
+ return Op;
+ }
+
+ // Fall back to loading it from memory.
+ return SDValue();
+ }
+
+ // See if we should use shuffles to construct the vector from other vectors.
+ if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
+ return Res;
+
+ // Detect SCALAR_TO_VECTOR conversions.
+ if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
+ return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
+
+ // Otherwise use buildVector to build the vector up from GPRs.
+ unsigned NumElements = Op.getNumOperands();
+ SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I)
+ Ops[I] = Op.getOperand(I);
+ return buildVector(DAG, DL, VT, Ops);
+}
+
+SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElements = VT.getVectorNumElements();
+
+ if (VSN->isSplat()) {
+ SDValue Op0 = Op.getOperand(0);
+ unsigned Index = VSN->getSplatIndex();
+ assert(Index < VT.getVectorNumElements() &&
+ "Splat index should be defined and in first operand");
+ // See whether the value we're splatting is directly available as a scalar.
+ if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
+ Op0.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
+ // Otherwise keep it as a vector-to-vector operation.
+ return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Index, DL, MVT::i32));
+ }
+
+ GeneralShuffle GS(VT);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ int Elt = VSN->getMaskElt(I);
+ if (Elt < 0)
+ GS.addUndef();
+ else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
+ unsigned(Elt) % NumElements))
+ return SDValue();
+ }
+ return GS.getNode(DAG, SDLoc(VSN));
+}
+
+SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ // Just insert the scalar into element 0 of an undefined vector.
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
+ Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
+}
+
+SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Handle insertions of floating-point values.
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+
+ // Insertions into constant indices of a v2f64 can be done using VPDI.
+ // However, if the inserted value is a bitcast or a constant then it's
+ // better to use GPRs, as below.
+ if (VT == MVT::v2f64 &&
+ Op1.getOpcode() != ISD::BITCAST &&
+ Op1.getOpcode() != ISD::ConstantFP &&
+ Op2.getOpcode() == ISD::Constant) {
+ uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
+ unsigned Mask = VT.getVectorNumElements() - 1;
+ if (Index <= Mask)
+ return Op;
+ }
+
+ // Otherwise bitcast to the equivalent integer form and insert via a GPR.
+ MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
+ DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Handle extractions of floating-point values.
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ EVT VecVT = Op0.getValueType();
+
+ // Extractions of constant indices can be done directly.
+ if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
+ uint64_t Index = CIndexN->getZExtValue();
+ unsigned Mask = VecVT.getVectorNumElements() - 1;
+ if (Index <= Mask)
+ return Op;
+ }
+
+ // Otherwise bitcast to the equivalent integer form and extract via a GPR.
+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const {
+ SDValue PackedOp = Op.getOperand(0);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned ToBits = OutVT.getScalarSizeInBits();
+ unsigned FromBits = InVT.getScalarSizeInBits();
+ do {
+ FromBits *= 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
+ SystemZ::VectorBits / FromBits);
+ PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ } while (FromBits != ToBits);
+ return PackedOp;
+}
+
+SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
+ unsigned ByScalar) const {
+ // Look for cases where a vector shift can use the *_BY_SCALAR form.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned ElemBitSize = VT.getScalarSizeInBits();
+
+ // See whether the shift vector is a splat represented as BUILD_VECTOR.
+ if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ // Check for constant splats. Use ElemBitSize as the minimum element
+ // width and reject splats that need wider elements.
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElemBitSize, true) &&
+ SplatBitSize == ElemBitSize) {
+ SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
+ DL, MVT::i32);
+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+ }
+ // Check for variable splats.
+ BitVector UndefElements;
+ SDValue Splat = BVN->getSplatValue(&UndefElements);
+ if (Splat) {
+ // Since i32 is the smallest legal type, we either need a no-op
+ // or a truncation.
+ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+ }
+ }
+
+ // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
+ // and the shift amount is directly available in a GPR.
+ if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
+ if (VSN->isSplat()) {
+ SDValue VSNOp0 = VSN->getOperand(0);
+ unsigned Index = VSN->getSplatIndex();
+ assert(Index < VT.getVectorNumElements() &&
+ "Splat index should be defined and in first operand");
+ if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
+ VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
+ // Since i32 is the smallest legal type, we either need a no-op
+ // or a truncation.
+ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+ VSNOp0.getOperand(Index));
+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+ }
+ }
+ }
+
+ // Otherwise just treat the current form as legal.
+ return Op;
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::FRAMEADDR:
+ return lowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR:
+ return lowerRETURNADDR(Op, DAG);
+ case ISD::BR_CC:
+ return lowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC:
+ return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC:
+ return lowerSETCC(Op, DAG);
+ case ISD::GlobalAddress:
+ return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
+ case ISD::GlobalTLSAddress:
+ return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
+ case ISD::BlockAddress:
+ return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
+ case ISD::JumpTable:
+ return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
+ case ISD::ConstantPool:
+ return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
+ case ISD::BITCAST:
+ return lowerBITCAST(Op, DAG);
+ case ISD::VASTART:
+ return lowerVASTART(Op, DAG);
+ case ISD::VACOPY:
+ return lowerVACOPY(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
+ case ISD::SMUL_LOHI:
+ return lowerSMUL_LOHI(Op, DAG);
+ case ISD::UMUL_LOHI:
+ return lowerUMUL_LOHI(Op, DAG);
+ case ISD::SDIVREM:
+ return lowerSDIVREM(Op, DAG);
+ case ISD::UDIVREM:
+ return lowerUDIVREM(Op, DAG);
+ case ISD::OR:
+ return lowerOR(Op, DAG);
+ case ISD::CTPOP:
+ return lowerCTPOP(Op, DAG);
+ case ISD::ATOMIC_FENCE:
+ return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::ATOMIC_SWAP:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
+ case ISD::ATOMIC_STORE:
+ return lowerATOMIC_STORE(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ return lowerATOMIC_LOAD(Op, DAG);
+ case ISD::ATOMIC_LOAD_ADD:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
+ case ISD::ATOMIC_LOAD_SUB:
+ return lowerATOMIC_LOAD_SUB(Op, DAG);
+ case ISD::ATOMIC_LOAD_AND:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
+ case ISD::ATOMIC_LOAD_OR:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
+ case ISD::ATOMIC_LOAD_XOR:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
+ case ISD::ATOMIC_LOAD_NAND:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
+ case ISD::ATOMIC_LOAD_MIN:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
+ case ISD::ATOMIC_LOAD_MAX:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
+ case ISD::ATOMIC_CMP_SWAP:
+ return lowerATOMIC_CMP_SWAP(Op, DAG);
+ case ISD::STACKSAVE:
+ return lowerSTACKSAVE(Op, DAG);
+ case ISD::STACKRESTORE:
+ return lowerSTACKRESTORE(Op, DAG);
+ case ISD::PREFETCH:
+ return lowerPREFETCH(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return lowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return lowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+ case ISD::SHL:
+ return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
+ case ISD::SRL:
+ return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
+ case ISD::SRA:
+ return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
+ default:
+ llvm_unreachable("Unexpected node to lower");
+ }
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
+ switch ((SystemZISD::NodeType)Opcode) {
+ case SystemZISD::FIRST_NUMBER: break;
+ OPCODE(RET_FLAG);
+ OPCODE(CALL);
+ OPCODE(SIBCALL);
+ OPCODE(TLS_GDCALL);
+ OPCODE(TLS_LDCALL);
+ OPCODE(PCREL_WRAPPER);
+ OPCODE(PCREL_OFFSET);
+ OPCODE(IABS);
+ OPCODE(ICMP);
+ OPCODE(FCMP);
+ OPCODE(TM);
+ OPCODE(BR_CCMASK);
+ OPCODE(SELECT_CCMASK);
+ OPCODE(ADJDYNALLOC);
+ OPCODE(POPCNT);
+ OPCODE(UMUL_LOHI64);
+ OPCODE(SDIVREM32);
+ OPCODE(SDIVREM64);
+ OPCODE(UDIVREM32);
+ OPCODE(UDIVREM64);
+ OPCODE(MVC);
+ OPCODE(MVC_LOOP);
+ OPCODE(NC);
+ OPCODE(NC_LOOP);
+ OPCODE(OC);
+ OPCODE(OC_LOOP);
+ OPCODE(XC);
+ OPCODE(XC_LOOP);
+ OPCODE(CLC);
+ OPCODE(CLC_LOOP);
+ OPCODE(STPCPY);
+ OPCODE(STRCMP);
+ OPCODE(SEARCH_STRING);
+ OPCODE(IPM);
+ OPCODE(MEMBARRIER);
+ OPCODE(TBEGIN);
+ OPCODE(TBEGIN_NOFLOAT);
+ OPCODE(TEND);
+ OPCODE(BYTE_MASK);
+ OPCODE(ROTATE_MASK);
+ OPCODE(REPLICATE);
+ OPCODE(JOIN_DWORDS);
+ OPCODE(SPLAT);
+ OPCODE(MERGE_HIGH);
+ OPCODE(MERGE_LOW);
+ OPCODE(SHL_DOUBLE);
+ OPCODE(PERMUTE_DWORDS);
+ OPCODE(PERMUTE);
+ OPCODE(PACK);
+ OPCODE(PACKS_CC);
+ OPCODE(PACKLS_CC);
+ OPCODE(UNPACK_HIGH);
+ OPCODE(UNPACKL_HIGH);
+ OPCODE(UNPACK_LOW);
+ OPCODE(UNPACKL_LOW);
+ OPCODE(VSHL_BY_SCALAR);
+ OPCODE(VSRL_BY_SCALAR);
+ OPCODE(VSRA_BY_SCALAR);
+ OPCODE(VSUM);
+ OPCODE(VICMPE);
+ OPCODE(VICMPH);
+ OPCODE(VICMPHL);
+ OPCODE(VICMPES);
+ OPCODE(VICMPHS);
+ OPCODE(VICMPHLS);
+ OPCODE(VFCMPE);
+ OPCODE(VFCMPH);
+ OPCODE(VFCMPHE);
+ OPCODE(VFCMPES);
+ OPCODE(VFCMPHS);
+ OPCODE(VFCMPHES);
+ OPCODE(VFTCI);
+ OPCODE(VEXTEND);
+ OPCODE(VROUND);
+ OPCODE(VTM);
+ OPCODE(VFAE_CC);
+ OPCODE(VFAEZ_CC);
+ OPCODE(VFEE_CC);
+ OPCODE(VFEEZ_CC);
+ OPCODE(VFENE_CC);
+ OPCODE(VFENEZ_CC);
+ OPCODE(VISTR_CC);
+ OPCODE(VSTRC_CC);
+ OPCODE(VSTRCZ_CC);
+ OPCODE(TDC);
+ OPCODE(ATOMIC_SWAPW);
+ OPCODE(ATOMIC_LOADW_ADD);
+ OPCODE(ATOMIC_LOADW_SUB);
+ OPCODE(ATOMIC_LOADW_AND);
+ OPCODE(ATOMIC_LOADW_OR);
+ OPCODE(ATOMIC_LOADW_XOR);
+ OPCODE(ATOMIC_LOADW_NAND);
+ OPCODE(ATOMIC_LOADW_MIN);
+ OPCODE(ATOMIC_LOADW_MAX);
+ OPCODE(ATOMIC_LOADW_UMIN);
+ OPCODE(ATOMIC_LOADW_UMAX);
+ OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(LRV);
+ OPCODE(STRV);
+ OPCODE(PREFETCH);
+ }
+ return nullptr;
+#undef OPCODE
+}
+
+// Return true if VT is a vector whose elements are a whole number of bytes
+// in width. Also check for presence of vector support.
+bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
+ if (!Subtarget.hasVector())
+ return false;
+
+ return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
+}
+
+// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
+// producing a result of type ResVT. Op is a possibly bitcast version
+// of the input vector and Index is the index (based on type VecVT) that
+// should be extracted. Return the new extraction if a simplification
+// was possible or if Force is true.
+SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
+ EVT VecVT, SDValue Op,
+ unsigned Index,
+ DAGCombinerInfo &DCI,
+ bool Force) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // The number of bytes being extracted.
+ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
+
+ for (;;) {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::BITCAST)
+ // Look through bitcasts.
+ Op = Op.getOperand(0);
+ else if (Opcode == ISD::VECTOR_SHUFFLE &&
+ canTreatAsByteVector(Op.getValueType())) {
+ // Get a VPERM-like permute mask and see whether the bytes covered
+ // by the extracted element are a contiguous sequence from one
+ // source operand.
+ SmallVector<int, SystemZ::VectorBytes> Bytes;
+ getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+ int First;
+ if (!getShuffleInput(Bytes, Index * BytesPerElement,
+ BytesPerElement, First))
+ break;
+ if (First < 0)
+ return DAG.getUNDEF(ResVT);
+ // Make sure the contiguous sequence starts at a multiple of the
+ // original element size.
+ unsigned Byte = unsigned(First) % Bytes.size();
+ if (Byte % BytesPerElement != 0)
+ break;
+ // We can get the extracted value directly from an input.
+ Index = Byte / BytesPerElement;
+ Op = Op.getOperand(unsigned(First) / Bytes.size());
+ Force = true;
+ } else if (Opcode == ISD::BUILD_VECTOR &&
+ canTreatAsByteVector(Op.getValueType())) {
+ // We can only optimize this case if the BUILD_VECTOR elements are
+ // at least as wide as the extracted value.
+ EVT OpVT = Op.getValueType();
+ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
+ if (OpBytesPerElement < BytesPerElement)
+ break;
+ // Make sure that the least-significant bit of the extracted value
+ // is the least significant bit of an input.
+ unsigned End = (Index + 1) * BytesPerElement;
+ if (End % OpBytesPerElement != 0)
+ break;
+ // We're extracting the low part of one operand of the BUILD_VECTOR.
+ Op = Op.getOperand(End / OpBytesPerElement - 1);
+ if (!Op.getValueType().isInteger()) {
+ EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ DCI.AddToWorklist(Op.getNode());
+ }
+ EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ if (VT != ResVT) {
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
+ }
+ return Op;
+ } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ canTreatAsByteVector(Op.getValueType()) &&
+ canTreatAsByteVector(Op.getOperand(0).getValueType())) {
+ // Make sure that only the unextended bits are significant.
+ EVT ExtVT = Op.getValueType();
+ EVT OpVT = Op.getOperand(0).getValueType();
+ unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
+ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
+ unsigned Byte = Index * BytesPerElement;
+ unsigned SubByte = Byte % ExtBytesPerElement;
+ unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
+ if (SubByte < MinSubByte ||
+ SubByte + BytesPerElement > ExtBytesPerElement)
+ break;
+ // Get the byte offset of the unextended element
+ Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
+ // ...then add the byte offset relative to that element.
+ Byte += SubByte - MinSubByte;
+ if (Byte % BytesPerElement != 0)
+ break;
+ Op = Op.getOperand(0);
+ Index = Byte / BytesPerElement;
+ Force = true;
+ } else
+ break;
+ }
+ if (Force) {
+ if (Op.getValueType() != VecVT) {
+ Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
+ DCI.AddToWorklist(Op.getNode());
+ }
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
+ DAG.getConstant(Index, DL, MVT::i32));
+ }
+ return SDValue();
+}
+
+// Optimize vector operations in scalar value Op on the basis that Op
+// is truncated to TruncVT.
+SDValue SystemZTargetLowering::combineTruncateExtract(
+ const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
+ // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
+ // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
+ // of type TruncVT.
+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ TruncVT.getSizeInBits() % 8 == 0) {
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ if (canTreatAsByteVector(VecVT)) {
+ if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
+ unsigned TruncBytes = TruncVT.getStoreSize();
+ if (BytesPerElement % TruncBytes == 0) {
+ // Calculate the value of Y' in the above description. We are
+ // splitting the original elements into Scale equal-sized pieces
+ // and for truncation purposes want the last (least-significant)
+ // of these pieces for IndexN. This is easiest to do by calculating
+ // the start index of the following element and then subtracting 1.
+ unsigned Scale = BytesPerElement / TruncBytes;
+ unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
+
+ // Defer the creation of the bitcast from X to combineExtract,
+ // which might be able to optimize the extraction.
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
+ VecVT.getStoreSize() / TruncBytes);
+ EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
+ return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSIGN_EXTEND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // Convert (sext (ashr (shl X, C1), C2)) to
+ // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
+ // cheap as narrower ones.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
+ auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ SDValue Inner = N0.getOperand(0);
+ if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
+ if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
+ unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
+ unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
+ unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
+ EVT ShiftVT = N0.getOperand(1).getValueType();
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
+ Inner.getOperand(0));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
+ DAG.getConstant(NewShlAmt, SDLoc(Inner),
+ ShiftVT));
+ return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
+ DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineMERGE(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned Opcode = N->getOpcode();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+ cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
+ // for v4f32.
+ if (Op1 == N->getOperand(0))
+ return Op1;
+ // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+ EVT VT = Op1.getValueType();
+ unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+ if (ElemBytes <= 4) {
+ Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+ EVT InVT = VT.changeVectorElementTypeToInteger();
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+ SystemZ::VectorBytes / ElemBytes / 2);
+ if (VT != InVT) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ }
+ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+ DCI.AddToWorklist(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ }
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSTORE(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ auto *SN = cast<StoreSDNode>(N);
+ auto &Op1 = N->getOperand(1);
+ EVT MemVT = SN->getMemoryVT();
+ // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
+ // for the extraction to be done on a vMiN value, so that we can use VSTE.
+ // If X has wider elements then convert it to:
+ // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
+ if (MemVT.isInteger()) {
+ if (SDValue Value =
+ combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
+ DCI.AddToWorklist(Value.getNode());
+
+ // Rewrite the store with the new form of stored value.
+ return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
+ SN->getBasePtr(), SN->getMemoryVT(),
+ SN->getMemOperand());
+ }
+ }
+ // Combine STORE (BSWAP) into STRVH/STRV/STRVG
+ // See comment in combineBSWAP about volatile accesses.
+ if (!SN->isVolatile() &&
+ Op1.getOpcode() == ISD::BSWAP &&
+ Op1.getNode()->hasOneUse() &&
+ (Op1.getValueType() == MVT::i16 ||
+ Op1.getValueType() == MVT::i32 ||
+ Op1.getValueType() == MVT::i64)) {
+
+ SDValue BSwapOp = Op1.getOperand(0);
+
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(Op1.getValueType())
+ };
+
+ return
+ DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
+ Ops, MemVT, SN->getMemOperand());
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+
+ if (!Subtarget.hasVector())
+ return SDValue();
+
+ // Try to simplify a vector extraction.
+ if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = Op0.getValueType();
+ return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
+ IndexN->getZExtValue(), DCI, false);
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineJOIN_DWORDS(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ // (join_dwords X, X) == (replicate X)
+ if (N->getOperand(0) == N->getOperand(1))
+ return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
+ N->getOperand(0));
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineFP_ROUND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // (fpround (extract_vector_elt X 0))
+ // (fpround (extract_vector_elt X 1)) ->
+ // (extract_vector_elt (VROUND X) 0)
+ // (extract_vector_elt (VROUND X) 1)
+ //
+ // This is a special case since the target doesn't really support v2f32s.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f32 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v2f64 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+ SDValue OtherRound = SDValue(*U->use_begin(), 0);
+ if (OtherRound.getOpcode() == ISD::FP_ROUND &&
+ OtherRound.getOperand(0) == SDValue(U, 0) &&
+ OtherRound.getValueType() == MVT::f32) {
+ SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
+ DCI.AddToWorklist(VRound.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
+ VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
+ VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineBSWAP(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
+ // These loads are allowed to access memory multiple times, and so we must check
+ // that the loads are not volatile before performing the combine.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
+ N->getValueType(0) == MVT::i64) &&
+ !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSHIFTROT(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Shift/rotate instructions only use the last 6 bits of the second operand
+ // register. If the second operand is the result of an AND with an immediate
+ // value that has its last 6 bits set, we can safely remove the AND operation.
+ //
+ // If the AND operation doesn't have the last 6 bits set, we can't remove it
+ // entirely, but we can still truncate it to a 16-bit value. This prevents
+ // us from ending up with a NILL with a signed operand, which will cause the
+ // instruction printer to abort.
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndMaskOp = N1->getOperand(1);
+ auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
+
+ // The AND mask is constant
+ if (AndMask) {
+ auto AmtVal = AndMask->getZExtValue();
+
+ // Bottom 6 bits are set
+ if ((AmtVal & 0x3f) == 0x3f) {
+ SDValue AndOp = N1->getOperand(0);
+
+ // This is the only use, so remove the node
+ if (N1.hasOneUse()) {
+ // Combine the AND away
+ DCI.CombineTo(N1.getNode(), AndOp);
+
+ // Return N so it isn't rechecked
+ return SDValue(N, 0);
+
+ // The node will be reused, so create a new node for this one use
+ } else {
+ SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0), N->getOperand(0),
+ AndOp);
+ DCI.AddToWorklist(Replace.getNode());
+
+ return Replace;
+ }
+
+ // We can't remove the AND, but we can use NILL here (normally we would
+ // use NILF). Only keep the last 16 bits of the mask. The actual
+ // transformation will be handled by .td definitions.
+ } else if (AmtVal >> 16 != 0) {
+ SDValue AndOp = N1->getOperand(0);
+
+ auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
+ SDLoc(AndMaskOp),
+ AndMaskOp.getValueType());
+
+ auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
+ AndOp, NewMask);
+
+ SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0), N->getOperand(0),
+ NewAnd);
+ DCI.AddToWorklist(Replace.getNode());
+
+ return Replace;
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
+ case SystemZISD::MERGE_HIGH:
+ case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
+ case ISD::STORE: return combineSTORE(N, DCI);
+ case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
+ case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+ case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::BSWAP: return combineBSWAP(N, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL: return combineSHIFTROT(N, DCI);
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Custom insertion
+//===----------------------------------------------------------------------===//
+
+// Create a new basic block after MBB.
+static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
+ MachineFunction &MF = *MBB->getParent();
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
+ return NewMBB;
+}
+
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+// Split MBB before MI and return the new block (the one that contains MI).
+static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+// Force base value Base into a register before MI. Return the register.
+static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
+ const SystemZInstrInfo *TII) {
+ if (Base.isReg())
+ return Base.getReg();
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
+ .add(Base)
+ .addImm(0)
+ .addReg(0);
+ return Reg;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitSelect(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned LOCROpcode) const {
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned TrueReg = MI.getOperand(1).getReg();
+ unsigned FalseReg = MI.getOperand(2).getReg();
+ unsigned CCValid = MI.getOperand(3).getImm();
+ unsigned CCMask = MI.getOperand(4).getImm();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Use LOCROpcode if possible.
+ if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
+ BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
+ .addReg(FalseReg).addReg(TrueReg)
+ .addImm(CCValid).addImm(CCMask);
+ MI.eraseFromParent();
+ return MBB;
+ }
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // BRC CCMask, JoinMBB
+ // # fallthrough to FalseMBB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
+ MBB->addSuccessor(JoinMBB);
+ MBB->addSuccessor(FalseMBB);
+
+ // FalseMBB:
+ // # fallthrough to JoinMBB
+ MBB = FalseMBB;
+ MBB->addSuccessor(JoinMBB);
+
+ // JoinMBB:
+ // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
+ // ...
+ MBB = JoinMBB;
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
+ .addReg(TrueReg).addMBB(StartMBB)
+ .addReg(FalseReg).addMBB(FalseMBB);
+
+ MI.eraseFromParent();
+ return JoinMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
+// StoreOpcode is the store to use and Invert says whether the store should
+// happen when the condition is false rather than true. If a STORE ON
+// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
+MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned StoreOpcode,
+ unsigned STOCOpcode,
+ bool Invert) const {
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+
+ unsigned SrcReg = MI.getOperand(0).getReg();
+ MachineOperand Base = MI.getOperand(1);
+ int64_t Disp = MI.getOperand(2).getImm();
+ unsigned IndexReg = MI.getOperand(3).getReg();
+ unsigned CCValid = MI.getOperand(4).getImm();
+ unsigned CCMask = MI.getOperand(5).getImm();
+ DebugLoc DL = MI.getDebugLoc();
+
+ StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
+
+ // Use STOCOpcode if possible. We could use different store patterns in
+ // order to avoid matching the index register, but the performance trade-offs
+ // might be more complicated in that case.
+ if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
+ if (Invert)
+ CCMask ^= CCValid;
+
+ // ISel pattern matching also adds a load memory operand of the same
+ // address, so take special care to find the storing memory operand.
+ MachineMemOperand *MMO = nullptr;
+ for (auto *I : MI.memoperands())
+ if (I->isStore()) {
+ MMO = I;
+ break;
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .addMemOperand(MMO);
+
+ MI.eraseFromParent();
+ return MBB;
+ }
+
+ // Get the condition needed to branch around the store.
+ if (!Invert)
+ CCMask ^= CCValid;
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // BRC CCMask, JoinMBB
+ // # fallthrough to FalseMBB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
+ MBB->addSuccessor(JoinMBB);
+ MBB->addSuccessor(FalseMBB);
+
+ // FalseMBB:
+ // store %SrcReg, %Disp(%Index,%Base)
+ // # fallthrough to JoinMBB
+ MBB = FalseMBB;
+ BuildMI(MBB, DL, TII->get(StoreOpcode))
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addReg(IndexReg);
+ MBB->addSuccessor(JoinMBB);
+
+ MI.eraseFromParent();
+ return JoinMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
+// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
+// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
+// BitSize is the width of the field in bits, or 0 if this is a partword
+// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
+// is one of the operands. Invert says whether the field should be
+// inverted after performing BinOpcode (e.g. for NAND).
+MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
+ unsigned BitSize, bool Invert) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool IsSubWord = (BitSize < 32);
+
+ // Extract the operands. Base can be a register or a frame index.
+ // Src2 can be a register or immediate.
+ unsigned Dest = MI.getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+ int64_t Disp = MI.getOperand(2).getImm();
+ MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
+ unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
+ unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+ DebugLoc DL = MI.getDebugLoc();
+ if (IsSubWord)
+ BitSize = MI.getOperand(6).getImm();
+
+ // Subword operations use 32-bit registers.
+ const TargetRegisterClass *RC = (BitSize <= 32 ?
+ &SystemZ::GR32BitRegClass :
+ &SystemZ::GR64BitRegClass);
+ unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
+ unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+ // Get the right opcodes for the displacement.
+ LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
+ CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+ assert(LOpcode && CSOpcode && "Displacement out of range");
+
+ // Create virtual registers for temporary results.
+ unsigned OrigVal = MRI.createVirtualRegister(RC);
+ unsigned OldVal = MRI.createVirtualRegister(RC);
+ unsigned NewVal = (BinOpcode || IsSubWord ?
+ MRI.createVirtualRegister(RC) : Src2.getReg());
+ unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+ unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+ // Insert a basic block for the main loop.
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // ...
+ // %OrigVal = L Disp(%Base)
+ // # fall through to LoopMMB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
+ // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+ // %RotatedNewVal = OP %RotatedOldVal, %Src2
+ // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
+ // %Dest = CS %OldVal, %NewVal, Disp(%Base)
+ // JNE LoopMBB
+ // # fall through to DoneMMB
+ MBB = LoopMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+ .addReg(OrigVal).addMBB(StartMBB)
+ .addReg(Dest).addMBB(LoopMBB);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+ .addReg(OldVal).addReg(BitShift).addImm(0);
+ if (Invert) {
+ // Perform the operation normally and then invert every bit of the field.
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
+ if (BitSize <= 32)
+ // XILF with the upper BitSize bits set.
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
+ .addReg(Tmp).addImm(-1U << (32 - BitSize));
+ else {
+ // Use LCGR and add -1 to the result, which is more compact than
+ // an XILF, XILH pair.
+ unsigned Tmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
+ .addReg(Tmp2).addImm(-1);
+ }
+ } else if (BinOpcode)
+ // A simply binary operation.
+ BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
+ .addReg(RotatedOldVal)
+ .add(Src2);
+ else if (IsSubWord)
+ // Use RISBG to rotate Src2 into position and use it to replace the
+ // field in RotatedOldVal.
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
+ .addReg(RotatedOldVal).addReg(Src2.getReg())
+ .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+ .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo
+// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
+// instruction that should be used to compare the current field with the
+// minimum or maximum value. KeepOldMask is the BRC condition-code mask
+// for when the current field should be kept. BitSize is the width of
+// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
+MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
+ unsigned KeepOldMask, unsigned BitSize) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool IsSubWord = (BitSize < 32);
+
+ // Extract the operands. Base can be a register or a frame index.
+ unsigned Dest = MI.getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+ int64_t Disp = MI.getOperand(2).getImm();
+ unsigned Src2 = MI.getOperand(3).getReg();
+ unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
+ unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+ DebugLoc DL = MI.getDebugLoc();
+ if (IsSubWord)
+ BitSize = MI.getOperand(6).getImm();
+
+ // Subword operations use 32-bit registers.
+ const TargetRegisterClass *RC = (BitSize <= 32 ?
+ &SystemZ::GR32BitRegClass :
+ &SystemZ::GR64BitRegClass);
+ unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
+ unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+ // Get the right opcodes for the displacement.
+ LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
+ CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+ assert(LOpcode && CSOpcode && "Displacement out of range");
+
+ // Create virtual registers for temporary results.
+ unsigned OrigVal = MRI.createVirtualRegister(RC);
+ unsigned OldVal = MRI.createVirtualRegister(RC);
+ unsigned NewVal = MRI.createVirtualRegister(RC);
+ unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+ unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
+ unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+ // Insert 3 basic blocks for the loop.
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
+ MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+
+ // StartMBB:
+ // ...
+ // %OrigVal = L Disp(%Base)
+ // # fall through to LoopMMB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
+ // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+ // CompareOpcode %RotatedOldVal, %Src2
+ // BRC KeepOldMask, UpdateMBB
+ MBB = LoopMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+ .addReg(OrigVal).addMBB(StartMBB)
+ .addReg(Dest).addMBB(UpdateMBB);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+ .addReg(OldVal).addReg(BitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(CompareOpcode))
+ .addReg(RotatedOldVal).addReg(Src2);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
+ MBB->addSuccessor(UpdateMBB);
+ MBB->addSuccessor(UseAltMBB);
+
+ // UseAltMBB:
+ // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
+ // # fall through to UpdateMMB
+ MBB = UseAltMBB;
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
+ .addReg(RotatedOldVal).addReg(Src2)
+ .addImm(32).addImm(31 + BitSize).addImm(0);
+ MBB->addSuccessor(UpdateMBB);
+
+ // UpdateMBB:
+ // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
+ // [ %RotatedAltVal, UseAltMBB ]
+ // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
+ // %Dest = CS %OldVal, %NewVal, Disp(%Base)
+ // JNE LoopMBB
+ // # fall through to DoneMMB
+ MBB = UpdateMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
+ .addReg(RotatedOldVal).addMBB(LoopMBB)
+ .addReg(RotatedAltVal).addMBB(UseAltMBB);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+ .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
+// instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Extract the operands. Base can be a register or a frame index.
+ unsigned Dest = MI.getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+ int64_t Disp = MI.getOperand(2).getImm();
+ unsigned OrigCmpVal = MI.getOperand(3).getReg();
+ unsigned OrigSwapVal = MI.getOperand(4).getReg();
+ unsigned BitShift = MI.getOperand(5).getReg();
+ unsigned NegBitShift = MI.getOperand(6).getReg();
+ int64_t BitSize = MI.getOperand(7).getImm();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
+
+ // Get the right opcodes for the displacement.
+ unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
+ unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
+ assert(LOpcode && CSOpcode && "Displacement out of range");
+
+ // Create virtual registers for temporary results.
+ unsigned OrigOldVal = MRI.createVirtualRegister(RC);
+ unsigned OldVal = MRI.createVirtualRegister(RC);
+ unsigned CmpVal = MRI.createVirtualRegister(RC);
+ unsigned SwapVal = MRI.createVirtualRegister(RC);
+ unsigned StoreVal = MRI.createVirtualRegister(RC);
+ unsigned RetryOldVal = MRI.createVirtualRegister(RC);
+ unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
+ unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
+
+ // Insert 2 basic blocks for the loop.
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
+
+ // StartMBB:
+ // ...
+ // %OrigOldVal = L Disp(%Base)
+ // # fall through to LoopMMB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
+ .add(Base)
+ .addImm(Disp)
+ .addReg(0);
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
+ // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
+ // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
+ // %Dest = RLL %OldVal, BitSize(%BitShift)
+ // ^^ The low BitSize bits contain the field
+ // of interest.
+ // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
+ // ^^ Replace the upper 32-BitSize bits of the
+ // comparison value with those that we loaded,
+ // so that we can use a full word comparison.
+ // CR %Dest, %RetryCmpVal
+ // JNE DoneMBB
+ // # Fall through to SetMBB
+ MBB = LoopMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+ .addReg(OrigOldVal).addMBB(StartMBB)
+ .addReg(RetryOldVal).addMBB(SetMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
+ .addReg(OrigCmpVal).addMBB(StartMBB)
+ .addReg(RetryCmpVal).addMBB(SetMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
+ .addReg(OrigSwapVal).addMBB(StartMBB)
+ .addReg(RetrySwapVal).addMBB(SetMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
+ .addReg(OldVal).addReg(BitShift).addImm(BitSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
+ .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::CR))
+ .addReg(Dest).addReg(RetryCmpVal);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
+ MBB->addSuccessor(DoneMBB);
+ MBB->addSuccessor(SetMBB);
+
+ // SetMBB:
+ // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
+ // ^^ Replace the upper 32-BitSize bits of the new
+ // value with those that we loaded.
+ // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
+ // ^^ Rotate the new field to its proper position.
+ // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
+ // JNE LoopMBB
+ // # fall through to ExitMMB
+ MBB = SetMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
+ .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
+ .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
+ BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
+ .addReg(OldVal)
+ .addReg(StoreVal)
+ .add(Base)
+ .addImm(Disp);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
+// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
+// if the high register of the GR128 value must be cleared or false if
+// it's "don't care". SubReg is subreg_l32 when extending a GR32
+// and subreg_l64 when extending a GR64.
+MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ bool ClearEven,
+ unsigned SubReg) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dest = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
+ if (ClearEven) {
+ unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
+ .addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
+ .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
+ In128 = NewIn128;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
+ .addReg(In128).addReg(Src).addImm(SubReg);
+
+ MI.eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
+ uint64_t DestDisp = MI.getOperand(1).getImm();
+ MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
+ uint64_t SrcDisp = MI.getOperand(3).getImm();
+ uint64_t Length = MI.getOperand(4).getImm();
+
+ // When generating more than one CLC, all but the last will need to
+ // branch to the end when a difference is found.
+ MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
+ splitBlockAfter(MI, MBB) : nullptr);
+
+ // Check for the loop form, in which operand 5 is the trip count.
+ if (MI.getNumExplicitOperands() > 5) {
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+
+ uint64_t StartCountReg = MI.getOperand(5).getReg();
+ uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
+ uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
+ forceReg(MI, DestBase, TII));
+
+ const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
+ uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ MRI.createVirtualRegister(RC));
+ uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+ MRI.createVirtualRegister(RC));
+
+ RC = &SystemZ::GR64BitRegClass;
+ uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
+ uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
+ // [ %NextDestReg, NextMBB ]
+ // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
+ // [ %NextSrcReg, NextMBB ]
+ // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
+ // [ %NextCountReg, NextMBB ]
+ // ( PFD 2, 768+DestDisp(%ThisDestReg) )
+ // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
+ // ( JLH EndMBB )
+ //
+ // The prefetch is used only for MVC. The JLH is used only for CLC.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
+ .addReg(StartDestReg).addMBB(StartMBB)
+ .addReg(NextDestReg).addMBB(NextMBB);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
+ .addReg(StartSrcReg).addMBB(StartMBB)
+ .addReg(NextSrcReg).addMBB(NextMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
+ .addReg(StartCountReg).addMBB(StartMBB)
+ .addReg(NextCountReg).addMBB(NextMBB);
+ if (Opcode == SystemZ::MVC)
+ BuildMI(MBB, DL, TII->get(SystemZ::PFD))
+ .addImm(SystemZ::PFD_WRITE)
+ .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
+ .addReg(ThisSrcReg).addImm(SrcDisp);
+ if (EndMBB) {
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ }
+
+ // NextMBB:
+ // %NextDestReg = LA 256(%ThisDestReg)
+ // %NextSrcReg = LA 256(%ThisSrcReg)
+ // %NextCountReg = AGHI %ThisCountReg, -1
+ // CGHI %NextCountReg, 0
+ // JLH LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
+ MBB = NextMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
+ .addReg(ThisDestReg).addImm(256).addReg(0);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
+ .addReg(ThisSrcReg).addImm(256).addReg(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
+ .addReg(ThisCountReg).addImm(-1);
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(NextCountReg).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DestBase = MachineOperand::CreateReg(NextDestReg, false);
+ SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
+ Length &= 255;
+ MBB = DoneMBB;
+ }
+ // Handle any remaining bytes with straight-line code.
+ while (Length > 0) {
+ uint64_t ThisLength = std::min(Length, uint64_t(256));
+ // The previous iteration might have created out-of-range displacements.
+ // Apply them using LAY if so.
+ if (!isUInt<12>(DestDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .add(DestBase)
+ .addImm(DestDisp)
+ .addReg(0);
+ DestBase = MachineOperand::CreateReg(Reg, false);
+ DestDisp = 0;
+ }
+ if (!isUInt<12>(SrcDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .add(SrcBase)
+ .addImm(SrcDisp)
+ .addReg(0);
+ SrcBase = MachineOperand::CreateReg(Reg, false);
+ SrcDisp = 0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(Opcode))
+ .add(DestBase)
+ .addImm(DestDisp)
+ .addImm(ThisLength)
+ .add(SrcBase)
+ .addImm(SrcDisp)
+ ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ DestDisp += ThisLength;
+ SrcDisp += ThisLength;
+ Length -= ThisLength;
+ // If there's another CLC to go, branch to the end if a difference
+ // was found.
+ if (EndMBB && Length > 0) {
+ MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ MBB = NextMBB;
+ }
+ }
+ if (EndMBB) {
+ MBB->addSuccessor(EndMBB);
+ MBB = EndMBB;
+ MBB->addLiveIn(SystemZ::CC);
+ }
+
+ MI.eraseFromParent();
+ return MBB;
+}
+
+// Decompose string pseudo-instruction MI into a loop that continually performs
+// Opcode until CC != 3.
+MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ uint64_t End1Reg = MI.getOperand(0).getReg();
+ uint64_t Start1Reg = MI.getOperand(1).getReg();
+ uint64_t Start2Reg = MI.getOperand(2).getReg();
+ uint64_t CharReg = MI.getOperand(3).getReg();
+
+ const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
+ uint64_t This1Reg = MRI.createVirtualRegister(RC);
+ uint64_t This2Reg = MRI.createVirtualRegister(RC);
+ uint64_t End2Reg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
+ // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
+ // R0L = %CharReg
+ // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
+ // JO LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The load of R0L can be hoisted by post-RA LICM.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
+ .addReg(Start1Reg).addMBB(StartMBB)
+ .addReg(End1Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
+ .addReg(Start2Reg).addMBB(StartMBB)
+ .addReg(End2Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
+ .addReg(This1Reg).addReg(This2Reg);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DoneMBB->addLiveIn(SystemZ::CC);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
+ bool NoFloat) const {
+ MachineFunction &MF = *MBB->getParent();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // Update opcode.
+ MI.setDesc(TII->get(Opcode));
+
+ // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+ // Make sure to add the corresponding GRSM bits if they are missing.
+ uint64_t Control = MI.getOperand(2).getImm();
+ static const unsigned GPRControlBit[16] = {
+ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+ };
+ Control |= GPRControlBit[15];
+ if (TFI->hasFP(MF))
+ Control |= GPRControlBit[11];
+ MI.getOperand(2).setImm(Control);
+
+ // Add GPR clobbers.
+ for (int I = 0; I < 16; I++) {
+ if ((Control & GPRControlBit[I]) == 0) {
+ unsigned Reg = SystemZMC::GR64Regs[I];
+ MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ // Add FPR/VR clobbers.
+ if (!NoFloat && (Control & 4) != 0) {
+ if (Subtarget.hasVector()) {
+ for (int I = 0; I < 32; I++) {
+ unsigned Reg = SystemZMC::VR128Regs[I];
+ MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ } else {
+ for (int I = 0; I < 16; I++) {
+ unsigned Reg = SystemZMC::FP64Regs[I];
+ MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+ }
+
+ return MBB;
+}
+
+MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ // Create new virtual register of the same class as source.
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ unsigned DstReg = MRI->createVirtualRegister(RC);
+
+ // Replace pseudo with a normal load-and-test that models the def as
+ // well.
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
+ .addReg(SrcReg);
+ MI.eraseFromParent();
+
+ return MBB;
+}
+
+MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr &MI, MachineBasicBlock *MBB) const {
+ switch (MI.getOpcode()) {
+ case SystemZ::Select32Mux:
+ return emitSelect(MI, MBB,
+ Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
+ case SystemZ::Select32:
+ return emitSelect(MI, MBB, SystemZ::LOCR);
+ case SystemZ::Select64:
+ return emitSelect(MI, MBB, SystemZ::LOCGR);
+ case SystemZ::SelectF32:
+ case SystemZ::SelectF64:
+ case SystemZ::SelectF128:
+ return emitSelect(MI, MBB, 0);
+
+ case SystemZ::CondStore8Mux:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
+ case SystemZ::CondStore8MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
+ case SystemZ::CondStore16Mux:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
+ case SystemZ::CondStore16MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
+ case SystemZ::CondStore32Mux:
+ return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
+ case SystemZ::CondStore32MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
+ case SystemZ::CondStore8:
+ return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
+ case SystemZ::CondStore8Inv:
+ return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
+ case SystemZ::CondStore16:
+ return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
+ case SystemZ::CondStore16Inv:
+ return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
+ case SystemZ::CondStore32:
+ return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
+ case SystemZ::CondStore32Inv:
+ return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
+ case SystemZ::CondStore64:
+ return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
+ case SystemZ::CondStore64Inv:
+ return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
+ case SystemZ::CondStoreF32:
+ return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
+ case SystemZ::CondStoreF32Inv:
+ return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
+ case SystemZ::CondStoreF64:
+ return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
+ case SystemZ::CondStoreF64Inv:
+ return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
+
+ case SystemZ::AEXT128_64:
+ return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
+ case SystemZ::ZEXT128_32:
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
+ case SystemZ::ZEXT128_64:
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
+
+ case SystemZ::ATOMIC_SWAPW:
+ return emitAtomicLoadBinary(MI, MBB, 0, 0);
+ case SystemZ::ATOMIC_SWAP_32:
+ return emitAtomicLoadBinary(MI, MBB, 0, 32);
+ case SystemZ::ATOMIC_SWAP_64:
+ return emitAtomicLoadBinary(MI, MBB, 0, 64);
+
+ case SystemZ::ATOMIC_LOADW_AR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
+ case SystemZ::ATOMIC_LOADW_AFI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
+ case SystemZ::ATOMIC_LOAD_AR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
+ case SystemZ::ATOMIC_LOAD_AHI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
+ case SystemZ::ATOMIC_LOAD_AFI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
+ case SystemZ::ATOMIC_LOAD_AGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
+ case SystemZ::ATOMIC_LOAD_AGHI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
+ case SystemZ::ATOMIC_LOAD_AGFI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
+
+ case SystemZ::ATOMIC_LOADW_SR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
+ case SystemZ::ATOMIC_LOAD_SR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
+ case SystemZ::ATOMIC_LOAD_SGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
+
+ case SystemZ::ATOMIC_LOADW_NR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
+ case SystemZ::ATOMIC_LOADW_NILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
+ case SystemZ::ATOMIC_LOAD_NR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
+ case SystemZ::ATOMIC_LOAD_NILL:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
+ case SystemZ::ATOMIC_LOAD_NILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
+ case SystemZ::ATOMIC_LOAD_NILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
+ case SystemZ::ATOMIC_LOAD_NGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+ case SystemZ::ATOMIC_LOAD_NILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
+ case SystemZ::ATOMIC_LOAD_NILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_NILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
+
+ case SystemZ::ATOMIC_LOADW_OR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
+ case SystemZ::ATOMIC_LOADW_OILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
+ case SystemZ::ATOMIC_LOAD_OR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
+ case SystemZ::ATOMIC_LOAD_OILL:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
+ case SystemZ::ATOMIC_LOAD_OILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
+ case SystemZ::ATOMIC_LOAD_OILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
+ case SystemZ::ATOMIC_LOAD_OGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+ case SystemZ::ATOMIC_LOAD_OILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
+ case SystemZ::ATOMIC_LOAD_OILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_OILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
+
+ case SystemZ::ATOMIC_LOADW_XR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
+ case SystemZ::ATOMIC_LOADW_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
+ case SystemZ::ATOMIC_LOAD_XR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
+ case SystemZ::ATOMIC_LOAD_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
+ case SystemZ::ATOMIC_LOAD_XGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
+ case SystemZ::ATOMIC_LOAD_XILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
+ case SystemZ::ATOMIC_LOAD_XIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
+
+ case SystemZ::ATOMIC_LOADW_NRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
+ case SystemZ::ATOMIC_LOADW_NILHi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
+ case SystemZ::ATOMIC_LOAD_NRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
+ case SystemZ::ATOMIC_LOAD_NILLi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
+ case SystemZ::ATOMIC_LOAD_NILHi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
+ case SystemZ::ATOMIC_LOAD_NILFi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
+ case SystemZ::ATOMIC_LOAD_NGRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
+
+ case SystemZ::ATOMIC_LOADW_MIN:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_LE, 0);
+ case SystemZ::ATOMIC_LOAD_MIN_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_LE, 32);
+ case SystemZ::ATOMIC_LOAD_MIN_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+ SystemZ::CCMASK_CMP_LE, 64);
+
+ case SystemZ::ATOMIC_LOADW_MAX:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_GE, 0);
+ case SystemZ::ATOMIC_LOAD_MAX_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_GE, 32);
+ case SystemZ::ATOMIC_LOAD_MAX_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+ SystemZ::CCMASK_CMP_GE, 64);
+
+ case SystemZ::ATOMIC_LOADW_UMIN:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_LE, 0);
+ case SystemZ::ATOMIC_LOAD_UMIN_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_LE, 32);
+ case SystemZ::ATOMIC_LOAD_UMIN_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+ SystemZ::CCMASK_CMP_LE, 64);
+
+ case SystemZ::ATOMIC_LOADW_UMAX:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_GE, 0);
+ case SystemZ::ATOMIC_LOAD_UMAX_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_GE, 32);
+ case SystemZ::ATOMIC_LOAD_UMAX_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+ SystemZ::CCMASK_CMP_GE, 64);
+
+ case SystemZ::ATOMIC_CMP_SWAPW:
+ return emitAtomicCmpSwapW(MI, MBB);
+ case SystemZ::MVCSequence:
+ case SystemZ::MVCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
+ case SystemZ::NCSequence:
+ case SystemZ::NCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::NC);
+ case SystemZ::OCSequence:
+ case SystemZ::OCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::OC);
+ case SystemZ::XCSequence:
+ case SystemZ::XCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::XC);
+ case SystemZ::CLCSequence:
+ case SystemZ::CLCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
+ case SystemZ::CLSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::CLST);
+ case SystemZ::MVSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::MVST);
+ case SystemZ::SRSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::SRST);
+ case SystemZ::TBEGIN:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+ case SystemZ::TBEGIN_nofloat:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+ case SystemZ::TBEGINC:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
+ case SystemZ::LTEBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
+ case SystemZ::LTDBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
+ case SystemZ::LTXBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+}
+
+// This is only used by the isel schedulers, and is needed only to prevent
+// compiler from crashing when list-ilp is used.
+const TargetRegisterClass *
+SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
+ if (VT == MVT::Untyped)
+ return &SystemZ::ADDR128BitRegClass;
+ return TargetLowering::getRepRegClassFor(VT);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 000000000000..5dcb19c0a35d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,592 @@
+//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
+
+#include "SystemZ.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace SystemZISD {
+enum NodeType : unsigned {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ // Calls a function. Operand 0 is the chain operand and operand 1
+ // is the target address. The arguments start at operand 2.
+ // There is an optional glue operand at the end.
+ CALL,
+ SIBCALL,
+
+ // TLS calls. Like regular calls, except operand 1 is the TLS symbol.
+ // (The call target is implicitly __tls_get_offset.)
+ TLS_GDCALL,
+ TLS_LDCALL,
+
+ // Wraps a TargetGlobalAddress that should be loaded using PC-relative
+ // accesses (LARL). Operand 0 is the address.
+ PCREL_WRAPPER,
+
+ // Used in cases where an offset is applied to a TargetGlobalAddress.
+ // Operand 0 is the full TargetGlobalAddress and operand 1 is a
+ // PCREL_WRAPPER for an anchor point. This is used so that we can
+ // cheaply refer to either the full address or the anchor point
+ // as a register base.
+ PCREL_OFFSET,
+
+ // Integer absolute.
+ IABS,
+
+ // Integer comparisons. There are three operands: the two values
+ // to compare, and an integer of type SystemZICMP.
+ ICMP,
+
+ // Floating-point comparisons. The two operands are the values to compare.
+ FCMP,
+
+ // Test under mask. The first operand is ANDed with the second operand
+ // and the condition codes are set on the result. The third operand is
+ // a boolean that is true if the condition codes need to distinguish
+ // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
+ // register forms do but the memory forms don't).
+ TM,
+
+ // Branches if a condition is true. Operand 0 is the chain operand;
+ // operand 1 is the 4-bit condition-code mask, with bit N in
+ // big-endian order meaning "branch if CC=N"; operand 2 is the
+ // target block and operand 3 is the flag operand.
+ BR_CCMASK,
+
+ // Selects between operand 0 and operand 1. Operand 2 is the
+ // mask of condition-code values for which operand 0 should be
+ // chosen over operand 1; it has the same form as BR_CCMASK.
+ // Operand 3 is the flag operand.
+ SELECT_CCMASK,
+
+ // Evaluates to the gap between the stack pointer and the
+ // base of the dynamically-allocatable area.
+ ADJDYNALLOC,
+
+ // Count number of bits set in operand 0 per byte.
+ POPCNT,
+
+ // Wrappers around the ISD opcodes of the same name. The output and
+ // first input operands are GR128s. The trailing numbers are the
+ // widths of the second operand in bits.
+ UMUL_LOHI64,
+ SDIVREM32,
+ SDIVREM64,
+ UDIVREM32,
+ UDIVREM64,
+
+ // Use a series of MVCs to copy bytes from one memory location to another.
+ // The operands are:
+ // - the target address
+ // - the source address
+ // - the constant length
+ //
+ // This isn't a memory opcode because we'd need to attach two
+ // MachineMemOperands rather than one.
+ MVC,
+
+ // Like MVC, but implemented as a loop that handles X*256 bytes
+ // followed by straight-line code to handle the rest (if any).
+ // The value of X is passed as an additional operand.
+ MVC_LOOP,
+
+ // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
+ NC,
+ NC_LOOP,
+ OC,
+ OC_LOOP,
+ XC,
+ XC_LOOP,
+
+ // Use CLC to compare two blocks of memory, with the same comments
+ // as for MVC and MVC_LOOP.
+ CLC,
+ CLC_LOOP,
+
+ // Use an MVST-based sequence to implement stpcpy().
+ STPCPY,
+
+ // Use a CLST-based sequence to implement strcmp(). The two input operands
+ // are the addresses of the strings to compare.
+ STRCMP,
+
+ // Use an SRST-based sequence to search a block of memory. The first
+ // operand is the end address, the second is the start, and the third
+ // is the character to search for. CC is set to 1 on success and 2
+ // on failure.
+ SEARCH_STRING,
+
+ // Store the CC value in bits 29 and 28 of an integer.
+ IPM,
+
+ // Compiler barrier only; generate a no-op.
+ MEMBARRIER,
+
+ // Transaction begin. The first operand is the chain, the second
+ // the TDB pointer, and the third the immediate control field.
+ // Returns chain and glue.
+ TBEGIN,
+ TBEGIN_NOFLOAT,
+
+ // Transaction end. Just the chain operand. Returns chain and glue.
+ TEND,
+
+ // Create a vector constant by filling byte N of the result with bit
+ // 15-N of the single operand.
+ BYTE_MASK,
+
+ // Create a vector constant by replicating an element-sized RISBG-style mask.
+ // The first operand specifies the starting set bit and the second operand
+ // specifies the ending set bit. Both operands count from the MSB of the
+ // element.
+ ROTATE_MASK,
+
+ // Replicate a GPR scalar value into all elements of a vector.
+ REPLICATE,
+
+ // Create a vector from two i64 GPRs.
+ JOIN_DWORDS,
+
+ // Replicate one element of a vector into all elements. The first operand
+ // is the vector and the second is the index of the element to replicate.
+ SPLAT,
+
+ // Interleave elements from the high half of operand 0 and the high half
+ // of operand 1.
+ MERGE_HIGH,
+
+ // Likewise for the low halves.
+ MERGE_LOW,
+
+ // Concatenate the vectors in the first two operands, shift them left
+ // by the third operand, and take the first half of the result.
+ SHL_DOUBLE,
+
+ // Take one element of the first v2i64 operand and the one element of
+ // the second v2i64 operand and concatenate them to form a v2i64 result.
+ // The third operand is a 4-bit value of the form 0A0B, where A and B
+ // are the element selectors for the first operand and second operands
+ // respectively.
+ PERMUTE_DWORDS,
+
+ // Perform a general vector permute on vector operands 0 and 1.
+ // Each byte of operand 2 controls the corresponding byte of the result,
+ // in the same way as a byte-level VECTOR_SHUFFLE mask.
+ PERMUTE,
+
+ // Pack vector operands 0 and 1 into a single vector with half-sized elements.
+ PACK,
+
+ // Likewise, but saturate the result and set CC. PACKS_CC does signed
+ // saturation and PACKLS_CC does unsigned saturation.
+ PACKS_CC,
+ PACKLS_CC,
+
+ // Unpack the first half of vector operand 0 into double-sized elements.
+ // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
+ UNPACK_HIGH,
+ UNPACKL_HIGH,
+
+ // Likewise for the second half.
+ UNPACK_LOW,
+ UNPACKL_LOW,
+
+ // Shift each element of vector operand 0 by the number of bits specified
+ // by scalar operand 1.
+ VSHL_BY_SCALAR,
+ VSRL_BY_SCALAR,
+ VSRA_BY_SCALAR,
+
+ // For each element of the output type, sum across all sub-elements of
+ // operand 0 belonging to the corresponding element, and add in the
+ // rightmost sub-element of the corresponding element of operand 1.
+ VSUM,
+
+ // Compare integer vector operands 0 and 1 to produce the usual 0/-1
+ // vector result. VICMPE is for equality, VICMPH for "signed greater than"
+ // and VICMPHL for "unsigned greater than".
+ VICMPE,
+ VICMPH,
+ VICMPHL,
+
+ // Likewise, but also set the condition codes on the result.
+ VICMPES,
+ VICMPHS,
+ VICMPHLS,
+
+ // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
+ // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
+ // greater than" and VFCMPHE for "ordered and greater than or equal to".
+ VFCMPE,
+ VFCMPH,
+ VFCMPHE,
+
+ // Likewise, but also set the condition codes on the result.
+ VFCMPES,
+ VFCMPHS,
+ VFCMPHES,
+
+ // Test floating-point data class for vectors.
+ VFTCI,
+
+ // Extend the even f32 elements of vector operand 0 to produce a vector
+ // of f64 elements.
+ VEXTEND,
+
+ // Round the f64 elements of vector operand 0 to f32s and store them in the
+ // even elements of the result.
+ VROUND,
+
+ // AND the two vector operands together and set CC based on the result.
+ VTM,
+
+ // String operations that set CC as a side-effect.
+ VFAE_CC,
+ VFAEZ_CC,
+ VFEE_CC,
+ VFEEZ_CC,
+ VFENE_CC,
+ VFENEZ_CC,
+ VISTR_CC,
+ VSTRC_CC,
+ VSTRCZ_CC,
+
+ // Test Data Class.
+ //
+ // Operand 0: the value to test
+ // Operand 1: the bit mask
+ TDC,
+
+ // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
+ // ATOMIC_LOAD_<op>.
+ //
+ // Operand 0: the address of the containing 32-bit-aligned field
+ // Operand 1: the second operand of <op>, in the high bits of an i32
+ // for everything except ATOMIC_SWAPW
+ // Operand 2: how many bits to rotate the i32 left to bring the first
+ // operand into the high bits
+ // Operand 3: the negative of operand 2, for rotating the other way
+ // Operand 4: the width of the field in bits (8 or 16)
+ ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOMIC_LOADW_ADD,
+ ATOMIC_LOADW_SUB,
+ ATOMIC_LOADW_AND,
+ ATOMIC_LOADW_OR,
+ ATOMIC_LOADW_XOR,
+ ATOMIC_LOADW_NAND,
+ ATOMIC_LOADW_MIN,
+ ATOMIC_LOADW_MAX,
+ ATOMIC_LOADW_UMIN,
+ ATOMIC_LOADW_UMAX,
+
+ // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
+ //
+ // Operand 0: the address of the containing 32-bit-aligned field
+ // Operand 1: the compare value, in the low bits of an i32
+ // Operand 2: the swap value, in the low bits of an i32
+ // Operand 3: how many bits to rotate the i32 left to bring the first
+ // operand into the high bits
+ // Operand 4: the negative of operand 2, for rotating the other way
+ // Operand 5: the width of the field in bits (8 or 16)
+ ATOMIC_CMP_SWAPW,
+
+ // Byte swapping load.
+ //
+ // Operand 0: the address to load from
+ // Operand 1: the type of load (i16, i32, i64)
+ LRV,
+
+ // Byte swapping store.
+ //
+ // Operand 0: the value to store
+ // Operand 1: the address to store to
+ // Operand 2: the type of store (i16, i32, i64)
+ STRV,
+
+ // Prefetch from the second operand using the 4-bit control code in
+ // the first operand. The code is 1 for a load prefetch and 2 for
+ // a store prefetch.
+ PREFETCH
+};
+
+// Return true if OPCODE is some kind of PC-relative address.
+inline bool isPCREL(unsigned Opcode) {
+ return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
+}
+} // end namespace SystemZISD
+
+namespace SystemZICMP {
+// Describes whether an integer comparison needs to be signed or unsigned,
+// or whether either type is OK.
+enum {
+ Any,
+ UnsignedOnly,
+ SignedOnly
+};
+} // end namespace SystemZICMP
+
+class SystemZSubtarget;
+class SystemZTargetMachine;
+
+class SystemZTargetLowering : public TargetLowering {
+public:
+ explicit SystemZTargetLowering(const TargetMachine &TM,
+ const SystemZSubtarget &STI);
+
+ // Override TargetLowering.
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
+ MVT getVectorIdxTy(const DataLayout &DL) const override {
+ // Only the lower 12 bits of an element index are used, so we don't
+ // want to clobber the upper 32 bits of a GPR unnecessarily.
+ return MVT::i32;
+ }
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ const override {
+ // Widen subvectors to the full width rather than promoting integer
+ // elements. This is better because:
+ //
+ // (a) it means that we can handle the ABI for passing and returning
+ // sub-128 vectors without having to handle them as legal types.
+ //
+ // (b) we don't have instructions to extend on load and truncate on store,
+ // so promoting the integers is less efficient.
+ //
+ // (c) there are no multiplication instructions for the widest integer
+ // type (v2i64).
+ if (VT.getScalarSizeInBits() % 8 == 0)
+ return TypeWidenVector;
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+ }
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT) const override;
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+ bool isLegalAddImmediate(int64_t Imm) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
+ bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+ unsigned Align,
+ bool *Fast) const override;
+ bool isTruncateFree(Type *, Type *) const override;
+ bool isTruncateFree(EVT, EVT) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+ TargetLowering::ConstraintType
+ getConstraintType(StringRef Constraint) const override;
+ TargetLowering::ConstraintWeight
+ getSingleConstraintMatchWeight(AsmOperandInfo &info,
+ const char *constraint) const override;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+
+ unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ if (ConstraintCode.size() == 1) {
+ switch(ConstraintCode[0]) {
+ default:
+ break;
+ case 'Q':
+ return InlineAsm::Constraint_Q;
+ case 'R':
+ return InlineAsm::Constraint_R;
+ case 'S':
+ return InlineAsm::Constraint_S;
+ case 'T':
+ return InlineAsm::Constraint_T;
+ }
+ }
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return SystemZ::R6D;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return SystemZ::R7D;
+ }
+
+ /// Override to support customized stack guard loading.
+ bool useLoadStackGuardNode() const override {
+ return true;
+ }
+ void insertSSPDeclarations(Module &M) const override {
+ }
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const override;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ bool allowTruncateForTailCall(Type *, Type *) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ ISD::NodeType getExtendForAtomicOps() const override {
+ return ISD::ANY_EXTEND;
+ }
+
+ bool supportSwiftError() const override {
+ return true;
+ }
+
+private:
+ const SystemZSubtarget &Subtarget;
+
+ // Implement LowerOperation for individual opcodes.
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const;
+ SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG, unsigned Opcode,
+ SDValue GOTOffset) const;
+ SDValue lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(BlockAddressSDNode *Node,
+ SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
+ unsigned Opcode) const;
+ SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const;
+ SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+
+ bool canTreatAsByteVector(EVT VT) const;
+ SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
+ unsigned Index, DAGCombinerInfo &DCI,
+ bool Force) const;
+ SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
+ DAGCombinerInfo &DCI) const;
+ SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ // If the last instruction before MBBI in MBB was some form of COMPARE,
+ // try to replace it with a COMPARE AND BRANCH just before MBBI.
+ // CCMask and Target are the BRC-like operands for the branch.
+ // Return true if the change was made.
+ bool convertPrevCompareToBranch(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned CCMask,
+ MachineBasicBlock *Target) const;
+
+ // Implement EmitInstrWithCustomInserter for individual operation types.
+ MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned LOCROpcode) const;
+ MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned StoreOpcode, unsigned STOCOpcode,
+ bool Invert) const;
+ MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
+ bool ClearEven, unsigned SubReg) const;
+ MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned BinOpcode, unsigned BitSize,
+ bool Invert = false) const;
+ MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned CompareOpcode,
+ unsigned KeepOldMask,
+ unsigned BitSize) const;
+ MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned Opcode) const;
+ MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned Opcode) const;
+ MachineBasicBlock *emitTransactionBegin(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode, bool NoFloat) const;
+ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const;
+
+ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 000000000000..896b665d25eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,46 @@
+//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// Add a BDX memory reference for frame object FI to MIB.
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ auto Flags = MachineMemOperand::MONone;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ int64_t Offset = 0;
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+ MFFrame.getObjectSize(FI), MFFrame.getObjectAlignment(FI));
+ return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td
new file mode 100644
index 000000000000..08ab2d7bbc52
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrDFP.td
@@ -0,0 +1,231 @@
+//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ decimal floating-point
+// arithmetic. These instructions are inot currently used for code generation,
+// are provided for use with the assembler and disassembler only. If LLVM
+// ever supports decimal floating-point types (_Decimal64 etc.), they can
+// also be used for code generation for those types.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and test.
+let Defs = [CC] in {
+ def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>;
+ def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations. The destination
+// of LDXTR is a 128-bit value, but only the first register of the pair is used.
+def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>;
+def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+
+// Extend floating-point values to wider representations.
+def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>;
+def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+
+// Convert a signed integer value to a floating-point one.
+def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>;
+def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
+let Predicates = [FeatureFPExtension] in {
+ def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>;
+ def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
+ def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>;
+ def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+}
+
+// Convert an unsigned integer value to a floating-point one.
+let Predicates = [FeatureFPExtension] in {
+ def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>;
+ def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>;
+ def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>;
+ def CXLFTR : TernaryRRFe<"cxlftr", 0xB95B, FP128, GR32>;
+}
+
+// Convert a floating-point value to a signed integer value.
+let Defs = [CC] in {
+ def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>;
+ def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>;
+ let Predicates = [FeatureFPExtension] in {
+ def CGDTRA : TernaryRRFe<"cgdtra", 0xB3E1, GR64, FP64>;
+ def CGXTRA : TernaryRRFe<"cgxtra", 0xB3E9, GR64, FP128>;
+ def CFDTR : TernaryRRFe<"cfdtr", 0xB941, GR32, FP64>;
+ def CFXTR : TernaryRRFe<"cfxtr", 0xB949, GR32, FP128>;
+ }
+}
+
+// Convert a floating-point value to an unsigned integer value.
+let Defs = [CC] in {
+ let Predicates = [FeatureFPExtension] in {
+ def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>;
+ def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>;
+ def CLFDTR : TernaryRRFe<"clfdtr", 0xB943, GR32, FP64>;
+ def CLFXTR : TernaryRRFe<"clfxtr", 0xB94B, GR32, FP128>;
+ }
+}
+
+// Convert a packed value to a floating-point one.
+def CDSTR : UnaryRRE<"cdstr", 0xB3F3, null_frag, FP64, GR64>;
+def CXSTR : UnaryRRE<"cxstr", 0xB3FB, null_frag, FP128, GR128>;
+def CDUTR : UnaryRRE<"cdutr", 0xB3F2, null_frag, FP64, GR64>;
+def CXUTR : UnaryRRE<"cxutr", 0xB3FA, null_frag, FP128, GR128>;
+
+// Convert a floating-point value to a packed value.
+def CSDTR : BinaryRRFd<"csdtr", 0xB3E3, GR64, FP64>;
+def CSXTR : BinaryRRFd<"csxtr", 0xB3EB, GR128, FP128>;
+def CUDTR : UnaryRRE<"cudtr", 0xB3E2, null_frag, GR64, FP64>;
+def CUXTR : UnaryRRE<"cuxtr", 0xB3EA, null_frag, GR128, FP128>;
+
+// Convert from/to memory values in the zoned format.
+let Predicates = [FeatureDFPZonedConversion] in {
+ def CDZT : BinaryRSL<"cdzt", 0xEDAA, FP64>;
+ def CXZT : BinaryRSL<"cxzt", 0xEDAB, FP128>;
+ def CZDT : StoreBinaryRSL<"czdt", 0xEDA8, FP64>;
+ def CZXT : StoreBinaryRSL<"czxt", 0xEDA9, FP128>;
+}
+
+// Convert from/to memory values in the packed format.
+let Predicates = [FeatureDFPPackedConversion] in {
+ def CDPT : BinaryRSL<"cdpt", 0xEDAE, FP64>;
+ def CXPT : BinaryRSL<"cxpt", 0xEDAF, FP128>;
+ def CPDT : StoreBinaryRSL<"cpdt", 0xEDAC, FP64>;
+ def CPXT : StoreBinaryRSL<"cpxt", 0xEDAD, FP128>;
+}
+
+// Perform floating-point operation.
+let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
+ def PFPO : SideEffectInherentE<"pfpo", 0x010A>;
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Round to an integer, with the second operand (M3) specifying the rounding
+// mode. M4 can be set to 4 to suppress detection of inexact conditions.
+def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>;
+def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+
+// Extract biased exponent.
+def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>;
+def EEXTR : UnaryRRE<"eextr", 0xB3ED, null_frag, FP128, FP128>;
+
+// Extract significance.
+def ESDTR : UnaryRRE<"esdtr", 0xB3E7, null_frag, FP64, FP64>;
+def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC] in {
+ let isCommutable = 1 in {
+ def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>;
+ def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>;
+ }
+ let Predicates = [FeatureFPExtension] in {
+ def ADTRA : TernaryRRFa<"adtra", 0xB3D2, FP64, FP64, FP64>;
+ def AXTRA : TernaryRRFa<"axtra", 0xB3DA, FP128, FP128, FP128>;
+ }
+}
+
+// Subtraction.
+let Defs = [CC] in {
+ def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>;
+ def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>;
+ let Predicates = [FeatureFPExtension] in {
+ def SDTRA : TernaryRRFa<"sdtra", 0xB3D3, FP64, FP64, FP64>;
+ def SXTRA : TernaryRRFa<"sxtra", 0xB3DB, FP128, FP128, FP128>;
+ }
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+ def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>;
+ def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
+}
+let Predicates = [FeatureFPExtension] in {
+ def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>;
+ def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+}
+
+// Division.
+def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>;
+def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
+let Predicates = [FeatureFPExtension] in {
+ def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>;
+ def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+}
+
+// Quantize.
+def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>;
+def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+
+// Reround.
+def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>;
+def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+
+// Shift significand left/right.
+def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>;
+def SLXT : BinaryRXF<"slxt", 0xED48, null_frag, FP128, FP128, null_frag, 0>;
+def SRDT : BinaryRXF<"srdt", 0xED41, null_frag, FP64, FP64, null_frag, 0>;
+def SRXT : BinaryRXF<"srxt", 0xED49, null_frag, FP128, FP128, null_frag, 0>;
+
+// Insert biased exponent.
+def IEDTR : BinaryRRFb<"iedtr", 0xB3F6, null_frag, FP64, FP64, FP64>;
+def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare.
+let Defs = [CC] in {
+ def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>;
+ def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>;
+}
+
+// Compare and signal.
+let Defs = [CC] in {
+ def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>;
+ def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>;
+}
+
+// Compare biased exponent.
+let Defs = [CC] in {
+ def CEDTR : CompareRRE<"cedtr", 0xB3F4, null_frag, FP64, FP64>;
+ def CEXTR : CompareRRE<"cextr", 0xB3FC, null_frag, FP128, FP128>;
+}
+
+// Test Data Class.
+let Defs = [CC] in {
+ def TDCET : TestRXE<"tdcet", 0xED50, null_frag, FP32>;
+ def TDCDT : TestRXE<"tdcdt", 0xED54, null_frag, FP64>;
+ def TDCXT : TestRXE<"tdcxt", 0xED58, null_frag, FP128>;
+}
+
+// Test Data Group.
+let Defs = [CC] in {
+ def TDGET : TestRXE<"tdget", 0xED51, null_frag, FP32>;
+ def TDGDT : TestRXE<"tdgdt", 0xED55, null_frag, FP64>;
+ def TDGXT : TestRXE<"tdgxt", 0xED59, null_frag, FP128>;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 000000000000..10172bd45203
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,521 @@
+//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// C's ?: operator for floating-point operands.
+def SelectF32 : SelectWrapper<FP32>;
+def SelectF64 : SelectWrapper<FP64>;
+def SelectF128 : SelectWrapper<FP128>;
+
+defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero.
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def LZER : InherentRRE<"lzer", 0xB374, FP32, fpimm0>;
+ def LZDR : InherentRRE<"lzdr", 0xB375, FP64, fpimm0>;
+ def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>;
+}
+
+// Moves between two floating-point registers.
+let hasSideEffects = 0 in {
+ def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>;
+ def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>;
+ def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
+
+ // For z13 we prefer LDR over LER to avoid partial register dependencies.
+ let isCodeGenOnly = 1 in
+ def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
+}
+
+// Moves between two floating-point registers that also set the condition
+// codes.
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
+ defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
+ defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
+}
+// Note that LTxBRCompare is not available if we have vector support,
+// since load-and-test instructions will partially clobber the target
+// (vector) register.
+let Predicates = [FeatureNoVector] in {
+ defm : CompareZeroFP<LTEBRCompare, FP32>;
+ defm : CompareZeroFP<LTDBRCompare, FP64>;
+ defm : CompareZeroFP<LTXBRCompare, FP128>;
+}
+
+// Use a normal load-and-test for compare against zero in case of
+// vector support (via a pseudo to simplify instruction selection).
+let Defs = [CC], usesCustomInserter = 1 in {
+ def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
+ def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
+ def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
+}
+let Predicates = [FeatureVector] in {
+ defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
+ defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
+ defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
+}
+
+// Moves between 64-bit integer and floating-point registers.
+def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
+def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
+
+// fcopysign with an FP32 result.
+let isCodeGenOnly = 1 in {
+ def CPSDRss : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP32>;
+ def CPSDRsd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP64>;
+}
+
+// The sign of an FP128 is in the high register.
+def : Pat<(fcopysign FP32:$src1, FP128:$src2),
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+
+// fcopysign with an FP64 result.
+let isCodeGenOnly = 1 in
+ def CPSDRds : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP32>;
+def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
+
+// The sign of an FP128 is in the high register.
+def : Pat<(fcopysign FP64:$src1, FP128:$src2),
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+
+// fcopysign with an FP128 result. Use "upper" as the high half and leave
+// the low half as-is.
+class CopySign128<RegisterOperand cls, dag upper>
+ : Pat<(fcopysign FP128:$src1, cls:$src2),
+ (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
+
+def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP32:$src2)>;
+def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP64:$src2)>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+
+defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
+defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
+defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
+
+//===----------------------------------------------------------------------===//
+// Load instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+ defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
+ defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
+
+ // For z13 we prefer LDE over LE to avoid partial register dependencies.
+ let isCodeGenOnly = 1 in
+ def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src),
+ [(set FP128:$dst, (load bdxaddr20only128:$src))]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Store instructions
+//===----------------------------------------------------------------------===//
+
+let SimpleBDXStore = 1 in {
+ defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>;
+ defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst),
+ [(store FP128:$src, bdxaddr20only128:$dst)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations, rounding
+// according to the current mode. The destination of LEXBR and LDXBR
+// is a 128-bit value, but only the first register of the pair is used.
+def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>;
+def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>,
+ Requires<[FeatureFPExtension]>;
+def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+
+def : Pat<(f32 (fpround FP128:$src)),
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
+def : Pat<(f64 (fpround FP128:$src)),
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
+
+// Extend register floating-point values to wider representations.
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>;
+
+// Extend memory floating-point values to wider representations.
+def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>;
+
+// Convert a signed integer register value to a floating-point one.
+def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
+def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
+def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>;
+def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>;
+def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+
+// The FP extension feature provides versions of the above that allow
+// specifying rounding mode and inexact-exception suppression flags.
+let Predicates = [FeatureFPExtension] in {
+ def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>;
+ def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>;
+ def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
+
+ def CEGBRA : TernaryRRFe<"cegbra", 0xB3A4, FP32, GR64>;
+ def CDGBRA : TernaryRRFe<"cdgbra", 0xB3A5, FP64, GR64>;
+ def CXGBRA : TernaryRRFe<"cxgbra", 0xB3A6, FP128, GR64>;
+}
+
+// Convert am unsigned integer register value to a floating-point one.
+let Predicates = [FeatureFPExtension] in {
+ def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>;
+ def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>;
+ def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
+
+ def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>;
+ def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>;
+ def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+
+ def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
+ def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
+ def : Pat<(f128 (uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>;
+
+ def : Pat<(f32 (uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>;
+ def : Pat<(f64 (uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>;
+ def : Pat<(f128 (uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>;
+}
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [CC] in {
+ def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
+ def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
+ def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
+
+ def CGEBR : BinaryRRFe<"cgebr", 0xB3A8, GR64, FP32>;
+ def CGDBR : BinaryRRFe<"cgdbr", 0xB3A9, GR64, FP64>;
+ def CGXBR : BinaryRRFe<"cgxbr", 0xB3AA, GR64, FP128>;
+}
+
+// fp_to_sint always rounds towards zero, which is modifier value 5.
+def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>;
+def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>;
+def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>;
+
+def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>;
+def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>;
+def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
+
+// The FP extension feature provides versions of the above that allow
+// also specifying the inexact-exception suppression flag.
+let Predicates = [FeatureFPExtension], Defs = [CC] in {
+ def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
+ def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
+ def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
+
+ def CGEBRA : TernaryRRFe<"cgebra", 0xB3A8, GR64, FP32>;
+ def CGDBRA : TernaryRRFe<"cgdbra", 0xB3A9, GR64, FP64>;
+ def CGXBRA : TernaryRRFe<"cgxbra", 0xB3AA, GR64, FP128>;
+}
+
+// Convert a floating-point register value to an unsigned integer value.
+let Predicates = [FeatureFPExtension] in {
+ let Defs = [CC] in {
+ def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
+ def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
+ def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
+
+ def CLGEBR : TernaryRRFe<"clgebr", 0xB3AC, GR64, FP32>;
+ def CLGDBR : TernaryRRFe<"clgdbr", 0xB3AD, GR64, FP64>;
+ def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>;
+ }
+
+ def : Pat<(i32 (fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>;
+ def : Pat<(i32 (fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>;
+ def : Pat<(i32 (fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>;
+
+ def : Pat<(i64 (fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>;
+ def : Pat<(i64 (fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>;
+ def : Pat<(i64 (fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// We prefer generic instructions during isel, because they do not
+// clobber CC and therefore give the scheduler more freedom. In cases
+// the CC is actually useful, the SystemZElimCompare pass will try to
+// convert generic instructions into opcodes that also set CC. Note
+// that lcdf / lpdf / lndf only affect the sign bit, and can therefore
+// be used with fp32 as well. This could be done for fp128, in which
+// case the operands would have to be tied.
+
+// Negation (Load Complement).
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def LCEBR : UnaryRRE<"lcebr", 0xB303, null_frag, FP32, FP32>;
+ def LCDBR : UnaryRRE<"lcdbr", 0xB313, null_frag, FP64, FP64>;
+ def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>;
+}
+// Generic form, which does not set CC.
+def LCDFR : UnaryRRE<"lcdfr", 0xB373, fneg, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LCDFR_32 : UnaryRRE<"lcdfr", 0xB373, fneg, FP32, FP32>;
+
+// Absolute value (Load Positive).
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def LPEBR : UnaryRRE<"lpebr", 0xB300, null_frag, FP32, FP32>;
+ def LPDBR : UnaryRRE<"lpdbr", 0xB310, null_frag, FP64, FP64>;
+ def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>;
+}
+// Generic form, which does not set CC.
+def LPDFR : UnaryRRE<"lpdfr", 0xB370, fabs, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LPDFR_32 : UnaryRRE<"lpdfr", 0xB370, fabs, FP32, FP32>;
+
+// Negative absolute value (Load Negative).
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def LNEBR : UnaryRRE<"lnebr", 0xB301, null_frag, FP32, FP32>;
+ def LNDBR : UnaryRRE<"lndbr", 0xB311, null_frag, FP64, FP64>;
+ def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>;
+}
+// Generic form, which does not set CC.
+def LNDFR : UnaryRRE<"lndfr", 0xB371, fnabs, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>;
+
+// Square root.
+def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>;
+def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>;
+def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+
+def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
+def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
+
+// Round to an integer, with the second operand (modifier M3) specifying
+// the rounding mode. These forms always check for inexact conditions.
+def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>;
+def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>;
+def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+
+// frint rounds according to the current mode (modifier 0) and detects
+// inexact conditions.
+def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
+def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
+def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+
+let Predicates = [FeatureFPExtension] in {
+ // Extended forms of the FIxBR instructions. M4 can be set to 4
+ // to suppress detection of inexact conditions.
+ def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>;
+ def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>;
+ def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+
+ // fnearbyint is like frint but does not detect inexact conditions.
+ def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
+ def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
+ def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+
+ // floor is no longer allowed to raise an inexact condition,
+ // so restrict it to the cases where the condition can be suppressed.
+ // Mode 7 is round towards -inf.
+ def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
+ def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
+ def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+
+ // Same idea for ceil, where mode 6 is round towards +inf.
+ def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
+ def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
+ def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+
+ // Same idea for trunc, where mode 5 is round towards zero.
+ def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
+ def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
+ def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+
+ // Same idea for round, where mode 1 is round towards nearest with
+ // ties away from zero.
+ def : Pat<(fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
+ def : Pat<(fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
+ def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ let isCommutable = 1 in {
+ def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>;
+ def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>;
+ def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+ }
+ def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
+ def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
+}
+
+// Subtraction.
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>;
+ def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>;
+ def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+
+ def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>;
+ def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+ def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>;
+ def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>;
+ def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>;
+}
+def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
+def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
+
+// f64 multiplication of two FP32 registers.
+def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
+ (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ FP32:$src1, subreg_r32), FP32:$src2)>;
+
+// f64 multiplication of an FP32 register and an f32 memory.
+def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
+def : Pat<(fmul (f64 (fpextend FP32:$src1)),
+ (f64 (extloadf32 bdxaddr12only:$addr))),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
+ bdxaddr12only:$addr)>;
+
+// f128 multiplication of two FP64 registers.
+def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+ (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+ FP64:$src1, subreg_h64), FP64:$src2)>;
+
+// f128 multiplication of an FP64 register and an f64 memory.
+def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
+def : Pat<(fmul (f128 (fpextend FP64:$src1)),
+ (f128 (extloadf64 bdxaddr12only:$addr))),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
+ bdxaddr12only:$addr)>;
+
+// Fused multiply-add.
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
+
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
+
+// Fused multiply-subtract.
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
+
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
+
+// Division.
+def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
+def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>;
+def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+
+def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
+def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
+
+// Divide to integer.
+let Defs = [CC] in {
+ def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
+ def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC], CCValues = 0xF in {
+ def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
+
+ def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
+
+ def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>;
+ def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>;
+ def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>;
+
+ def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>;
+ def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>;
+}
+
+// Test Data Class.
+let Defs = [CC], CCValues = 0xC in {
+ def TCEB : TestRXE<"tceb", 0xED10, z_tdc, FP32>;
+ def TCDB : TestRXE<"tcdb", 0xED11, z_tdc, FP64>;
+ def TCXB : TestRXE<"tcxb", 0xED12, z_tdc, FP128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+ def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
+ def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
+
+ def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
+ def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+
+ def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
+ def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+
+ def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
+ Requires<[FeatureFPExtension]>;
+ def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
+ def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes
+//===----------------------------------------------------------------------===//
+
+def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>;
+def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>;
+def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 000000000000..7620e06ccbc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,4545 @@
+//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Basic SystemZ instruction definition
+//===----------------------------------------------------------------------===//
+
+class InstSystemZ<int size, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : Instruction {
+ let Namespace = "SystemZ";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Size = size;
+ let Pattern = pattern;
+ let AsmString = asmstr;
+
+ // Some instructions come in pairs, one having a 12-bit displacement
+ // and the other having a 20-bit displacement. Both instructions in
+ // the pair have the same DispKey and their DispSizes are "12" and "20"
+ // respectively.
+ string DispKey = "";
+ string DispSize = "none";
+
+ // Many register-based <INSN>R instructions have a memory-based <INSN>
+ // counterpart. OpKey uniquely identifies <INSN>R, while OpType is
+ // "reg" for <INSN>R and "mem" for <INSN>.
+ string OpKey = "";
+ string OpType = "none";
+
+ // Many distinct-operands instructions have older 2-operand equivalents.
+ // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
+ // with NumOpsValue being "2" or "3" as appropriate.
+ string NumOpsKey = "";
+ string NumOpsValue = "none";
+
+ // True if this instruction is a simple D(X,B) load of a register
+ // (with no sign or zero extension).
+ bit SimpleBDXLoad = 0;
+
+ // True if this instruction is a simple D(X,B) store of a register
+ // (with no truncation).
+ bit SimpleBDXStore = 0;
+
+ // True if this instruction has a 20-bit displacement field.
+ bit Has20BitOffset = 0;
+
+ // True if addresses in this instruction have an index register.
+ bit HasIndex = 0;
+
+ // True if this is a 128-bit pseudo instruction that combines two 64-bit
+ // operations.
+ bit Is128Bit = 0;
+
+ // The access size of all memory operands in bytes, or 0 if not known.
+ bits<5> AccessBytes = 0;
+
+ // If the instruction sets CC to a useful value, this gives the mask
+ // of all possible CC results. The mask has the same form as
+ // SystemZ::CCMASK_*.
+ bits<4> CCValues = 0;
+
+ // The subset of CCValues that have the same meaning as they would after
+ // a comparison of the first operand against zero.
+ bits<4> CompareZeroCCMask = 0;
+
+ // True if the instruction is conditional and if the CC mask operand
+ // comes first (as for BRC, etc.).
+ bit CCMaskFirst = 0;
+
+ // Similar, but true if the CC mask operand comes last (as for LOC, etc.).
+ bit CCMaskLast = 0;
+
+ // True if the instruction is the "logical" rather than "arithmetic" form,
+ // in cases where a distinction exists.
+ bit IsLogical = 0;
+
+ let TSFlags{0} = SimpleBDXLoad;
+ let TSFlags{1} = SimpleBDXStore;
+ let TSFlags{2} = Has20BitOffset;
+ let TSFlags{3} = HasIndex;
+ let TSFlags{4} = Is128Bit;
+ let TSFlags{9-5} = AccessBytes;
+ let TSFlags{13-10} = CCValues;
+ let TSFlags{17-14} = CompareZeroCCMask;
+ let TSFlags{18} = CCMaskFirst;
+ let TSFlags{19} = CCMaskLast;
+ let TSFlags{20} = IsLogical;
+}
+
+//===----------------------------------------------------------------------===//
+// Mappings between instructions
+//===----------------------------------------------------------------------===//
+
+// Return the version of an instruction that has an unsigned 12-bit
+// displacement.
+def getDisp12Opcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["DispKey"];
+ let ColFields = ["DispSize"];
+ let KeyCol = ["20"];
+ let ValueCols = [["12"]];
+}
+
+// Return the version of an instruction that has a signed 20-bit displacement.
+def getDisp20Opcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["DispKey"];
+ let ColFields = ["DispSize"];
+ let KeyCol = ["12"];
+ let ValueCols = [["20"]];
+}
+
+// Return the memory form of a register instruction.
+def getMemOpcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["OpKey"];
+ let ColFields = ["OpType"];
+ let KeyCol = ["reg"];
+ let ValueCols = [["mem"]];
+}
+
+// Return the 3-operand form of a 2-operand instruction.
+def getThreeOperandOpcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["NumOpsKey"];
+ let ColFields = ["NumOpsValue"];
+ let KeyCol = ["2"];
+ let ValueCols = [["3"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+//
+// Formats are specified using operand field declarations of the form:
+//
+// bits<4> Rn : register input or output for operand n
+// bits<5> Vn : vector register input or output for operand n
+// bits<m> In : immediate value of width m for operand n
+// bits<4> BDn : address operand n, which has a base and a displacement
+// bits<m> XBDn : address operand n, which has an index, a base and a
+// displacement
+// bits<m> VBDn : address operand n, which has a vector index, a base and a
+// displacement
+// bits<4> Xn : index register for address operand n
+// bits<4> Mn : mode value for operand n
+//
+// The operand numbers ("n" in the list above) follow the architecture manual.
+// Assembly operands sometimes have a different order; in particular, R3 often
+// is often written between operands 1 and 2.
+//
+//===----------------------------------------------------------------------===//
+
+class InstE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+ field bits<16> SoftFail = 0;
+
+ let Inst = op;
+}
+
+class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+ field bits<16> SoftFail = 0;
+
+ bits<8> I1;
+
+ let Inst{15-8} = op;
+ let Inst{7-0} = I1;
+}
+
+class InstIE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> I1;
+ bits<4> I2;
+
+ let Inst{31-16} = op;
+ let Inst{15-8} = 0;
+ let Inst{7-4} = I1;
+ let Inst{3-0} = I2;
+}
+
+class InstMII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> M1;
+ bits<12> RI2;
+ bits<24> RI3;
+
+ let Inst{47-40} = op;
+ let Inst{39-36} = M1;
+ let Inst{35-24} = RI2;
+ let Inst{23-0} = RI3;
+}
+
+class InstRIa<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> I2;
+
+ let Inst{31-24} = op{11-4};
+ let Inst{23-20} = R1;
+ let Inst{19-16} = op{3-0};
+ let Inst{15-0} = I2;
+}
+
+class InstRIb<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> RI2;
+
+ let Inst{31-24} = op{11-4};
+ let Inst{23-20} = R1;
+ let Inst{19-16} = op{3-0};
+ let Inst{15-0} = RI2;
+}
+
+class InstRIc<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> M1;
+ bits<16> RI2;
+
+ let Inst{31-24} = op{11-4};
+ let Inst{23-20} = M1;
+ let Inst{19-16} = op{3-0};
+ let Inst{15-0} = RI2;
+}
+
+class InstRIEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> I2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = 0;
+ let Inst{31-16} = I2;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+ bits<16> RI4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R2;
+ let Inst{31-16} = RI4;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<8> I2;
+ bits<4> M3;
+ bits<16> RI4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = M3;
+ let Inst{31-16} = RI4;
+ let Inst{15-8} = I2;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<16> I2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-16} = I2;
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<16> RI2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-16} = RI2;
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<8> I3;
+ bits<8> I4;
+ bits<8> I5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R2;
+ let Inst{31-24} = I3;
+ let Inst{23-16} = I4;
+ let Inst{15-8} = I5;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIEg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> M3;
+ bits<16> I2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = M3;
+ let Inst{31-16} = I2;
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRILa<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<32> I2;
+
+ let Inst{47-40} = op{11-4};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = op{3-0};
+ let Inst{31-0} = I2;
+}
+
+class InstRILb<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<32> RI2;
+
+ let Inst{47-40} = op{11-4};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = op{3-0};
+ let Inst{31-0} = RI2;
+}
+
+class InstRILc<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> M1;
+ bits<32> RI2;
+
+ let Inst{47-40} = op{11-4};
+ let Inst{39-36} = M1;
+ let Inst{35-32} = op{3-0};
+ let Inst{31-0} = RI2;
+}
+
+class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<8> I2;
+ bits<4> M3;
+ bits<16> BD4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = M3;
+ let Inst{31-16} = BD4;
+ let Inst{15-8} = I2;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+ field bits<16> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+
+ let Inst{15-8} = op;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<4> R2;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = R1;
+ let Inst{11-8} = 0;
+ let Inst{7-4} = R3;
+ let Inst{3-0} = R2;
+}
+
+class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+
+ let Inst{31-16} = op;
+ let Inst{15-8} = 0;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRFa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> R3;
+ bits<4> M4;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = R3;
+ let Inst{11-8} = M4;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRFb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> R3;
+ bits<4> M4;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = R3;
+ let Inst{11-8} = M4;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRFd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M4;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = 0;
+ let Inst{11-8} = M4;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRFe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+ bits<4> M4;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = M4;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+ bits<16> BD4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R2;
+ let Inst{31-16} = BD4;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRXa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<20> XBD2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = R1;
+ let Inst{19-0} = XBD2;
+
+ let HasIndex = 1;
+}
+
+class InstRXb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> M1;
+ bits<20> XBD2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = M1;
+ let Inst{19-0} = XBD2;
+
+ let HasIndex = 1;
+}
+
+class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<20> XBD2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-16} = XBD2;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+
+ let HasIndex = 1;
+}
+
+class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<20> XBD2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R3;
+ let Inst{35-16} = XBD2;
+ let Inst{15-12} = R1;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+
+ let HasIndex = 1;
+}
+
+class InstRXYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<28> XBD2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-8} = XBD2;
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+}
+
+class InstRXYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> M1;
+ bits<28> XBD2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = M1;
+ let Inst{35-8} = XBD2;
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+}
+
+class InstRSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<16> BD2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = R1;
+ let Inst{19-16} = R3;
+ let Inst{15-0} = BD2;
+}
+
+class InstRSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> M3;
+ bits<16> BD2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = R1;
+ let Inst{19-16} = M3;
+ let Inst{15-0} = BD2;
+}
+
+class InstRSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<16> RI2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = R1;
+ let Inst{19-16} = R3;
+ let Inst{15-0} = RI2;
+}
+
+class InstRSLa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<20> BDL1;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = BDL1{19-16};
+ let Inst{35-32} = 0;
+ let Inst{31-16} = BDL1{15-0};
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRSLb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<24> BDL2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-16} = BDL2;
+ let Inst{15-12} = R1;
+ let Inst{11-8} = M3;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRSYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<24> BD2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-8} = BD2;
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+}
+
+class InstRSYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> M3;
+ bits<24> BD2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = M3;
+ let Inst{31-8} = BD2;
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+}
+
+class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<16> BD1;
+ bits<8> I2;
+
+ let Inst{31-24} = op;
+ let Inst{23-16} = I2;
+ let Inst{15-0} = BD1;
+}
+
+class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<16> BD1;
+ bits<16> I2;
+
+ let Inst{47-32} = op;
+ let Inst{31-16} = BD1;
+ let Inst{15-0} = I2;
+}
+
+class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<24> BD1;
+ bits<8> I2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-32} = I2;
+ let Inst{31-8} = BD1;
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+}
+
+class InstSMI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> M1;
+ bits<16> RI2;
+ bits<16> BD3;
+
+ let Inst{47-40} = op;
+ let Inst{39-36} = M1;
+ let Inst{35-32} = 0;
+ let Inst{31-16} = BD3;
+ let Inst{15-0} = RI2;
+}
+
+class InstSSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<24> BDL1;
+ bits<16> BD2;
+
+ let Inst{47-40} = op;
+ let Inst{39-16} = BDL1;
+ let Inst{15-0} = BD2;
+}
+
+class InstSSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<20> BDL1;
+ bits<20> BDL2;
+
+ let Inst{47-40} = op;
+ let Inst{39-36} = BDL1{19-16};
+ let Inst{35-32} = BDL2{19-16};
+ let Inst{31-16} = BDL1{15-0};
+ let Inst{15-0} = BDL2{15-0};
+}
+
+class InstSSc<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<20> BDL1;
+ bits<16> BD2;
+ bits<4> I3;
+
+ let Inst{47-40} = op;
+ let Inst{39-36} = BDL1{19-16};
+ let Inst{35-32} = I3;
+ let Inst{31-16} = BDL1{15-0};
+ let Inst{15-0} = BD2;
+}
+
+class InstSSd<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<20> RBD1;
+ bits<16> BD2;
+ bits<4> R3;
+
+ let Inst{47-40} = op;
+ let Inst{39-36} = RBD1{19-16};
+ let Inst{35-32} = R3;
+ let Inst{31-16} = RBD1{15-0};
+ let Inst{15-0} = BD2;
+}
+
+class InstSSe<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> BD2;
+ bits<4> R3;
+ bits<16> BD4;
+
+ let Inst{47-40} = op;
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-16} = BD2;
+ let Inst{15-0} = BD4;
+}
+
+class InstSSf<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<16> BD1;
+ bits<24> BDL2;
+
+ let Inst{47-40} = op;
+ let Inst{39-32} = BDL2{23-16};
+ let Inst{31-16} = BD1;
+ let Inst{15-0} = BDL2{15-0};
+}
+
+class InstSSE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<16> BD1;
+ bits<16> BD2;
+
+ let Inst{47-32} = op;
+ let Inst{31-16} = BD1;
+ let Inst{15-0} = BD2;
+}
+
+class InstSSF<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<16> BD1;
+ bits<16> BD2;
+ bits<4> R3;
+
+ let Inst{47-40} = op{11-4};
+ let Inst{39-36} = R3;
+ let Inst{35-32} = op{3-0};
+ let Inst{31-16} = BD1;
+ let Inst{15-0} = BD2;
+}
+
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<16> BD2;
+
+ let Inst{31-16} = op;
+ let Inst{15-0} = BD2;
+}
+
+class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> I2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = 0;
+ let Inst{31-16} = I2;
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<8> I2;
+ bits<8> I3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = 0;
+ let Inst{31-24} = I2;
+ let Inst{23-16} = I3;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V3;
+ bits<16> I2;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V3{3-0};
+ let Inst{31-16} = I2;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V3{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<8> I4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-16} = I4;
+ let Inst{15-12} = M5;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<12> I3;
+ bits<4> M4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-20} = I3;
+ let Inst{19-16} = M5;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M4 value provided as explicit operand. These are passed as m4or.
+class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ bits<4> m4or = 0>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<4> M3;
+ bits<4> M4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-24} = 0;
+ let Inst{23-20} = M5;
+ let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3});
+ let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2});
+ let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1});
+ let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0});
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M5 value provided as explicit operand. These are passed as m5or.
+class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ bits<4> m5or = 0>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<4> M4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3});
+ let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2});
+ let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1});
+ let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0});
+ let Inst{19-16} = 0;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<4> M4;
+ bits<4> M5;
+ bits<4> M6;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-20} = M6;
+ let Inst{19-16} = M5;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M6 value provided as explicit operand. These are passed as m6or.
+class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ bits<4> m6or = 0>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<5> V4;
+ bits<4> M5;
+ bits<4> M6;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = M5;
+ let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3});
+ let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2});
+ let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1});
+ let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0});
+ let Inst{19-16} = 0;
+ let Inst{15-12} = V4{3-0};
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = V4{4};
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<5> V4;
+ bits<4> M5;
+ bits<4> M6;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = M6;
+ let Inst{23-20} = 0;
+ let Inst{19-16} = M5;
+ let Inst{15-12} = V4{3-0};
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = V4{4};
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<4> R2;
+ bits<4> R3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = R2;
+ let Inst{31-28} = R3;
+ let Inst{27-12} = 0;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> BD2;
+ bits<5> V3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V3{3-0};
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V3{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> BD2;
+ bits<4> R3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = R3;
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> BD2;
+ bits<5> V3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = V3{3-0};
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = M4;
+ let Inst{11} = 0;
+ let Inst{10} = V3{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<21> VBD2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-16} = VBD2{19-0};
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10} = VBD2{20};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<20> XBD2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-16} = XBD2;
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction classes for .insn directives
+//===----------------------------------------------------------------------===//
+
+class DirectiveInsnE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstE<0, outs, ins, asmstr, pattern> {
+ bits<16> enc;
+
+ let Inst = enc;
+}
+
+class DirectiveInsnRI<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRIa<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-24} = enc{31-24};
+ let Inst{19-16} = enc{19-16};
+}
+
+class DirectiveInsnRIE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRIEd<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRIL<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRILa<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+ string type;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{35-32} = enc{35-32};
+}
+
+class DirectiveInsnRIS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRIS<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRR<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRR<0, outs, ins, asmstr, pattern> {
+ bits<16> enc;
+
+ let Inst{15-8} = enc{15-8};
+}
+
+class DirectiveInsnRRE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRRE<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-16} = enc{31-16};
+}
+
+class DirectiveInsnRRF<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRRFa<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-16} = enc{31-16};
+}
+
+class DirectiveInsnRRS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRRS<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRSa<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-24} = enc{31-24};
+}
+
+// RSE is like RSY except with a 12 bit displacement (instead of 20).
+class DirectiveInsnRSE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRSYa<6, outs, ins, asmstr, pattern> {
+ bits <48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{31-16} = BD2{15-0};
+ let Inst{15-8} = 0;
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRSI<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRSI<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-24} = enc{31-24};
+}
+
+class DirectiveInsnRSY<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRSYa<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRX<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRXa<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-24} = enc{31-24};
+}
+
+class DirectiveInsnRXE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRXE<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let M3 = 0;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRXF<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRXF<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnRXY<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstRXYa<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstS<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-16} = enc{31-16};
+}
+
+class DirectiveInsnSI<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSI<0, outs, ins, asmstr, pattern> {
+ bits<32> enc;
+
+ let Inst{31-24} = enc{31-24};
+}
+
+class DirectiveInsnSIY<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSIY<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{7-0} = enc{7-0};
+}
+
+class DirectiveInsnSIL<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSIL<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-32} = enc{47-32};
+}
+
+class DirectiveInsnSS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSSd<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+}
+
+class DirectiveInsnSSE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSSE<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-32} = enc{47-32};
+}
+
+class DirectiveInsnSSF<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSSF<0, outs, ins, asmstr, pattern> {
+ bits<48> enc;
+
+ let Inst{47-40} = enc{47-40};
+ let Inst{35-32} = enc{35-32};
+}
+
+//===----------------------------------------------------------------------===//
+// Variants of instructions with condition mask
+//===----------------------------------------------------------------------===//
+//
+// For instructions using a condition mask (e.g. conditional branches,
+// compare-and-branch instructions, or conditional move instructions),
+// we generally need to create multiple instruction patterns:
+//
+// - One used for code generation, which encodes the condition mask as an
+// MI operand, but writes out an extended mnemonic for better readability.
+// - One pattern for the base form of the instruction with an explicit
+// condition mask (encoded as a plain integer MI operand).
+// - Specific patterns for each extended mnemonic, where the condition mask
+// is implied by the pattern name and not otherwise encoded at all.
+//
+// We need the latter primarily for the assembler and disassembler, since the
+// assembler parser is not able to decode part of an instruction mnemonic
+// into an operand. Thus we provide separate patterns for each mnemonic.
+//
+// Note that in some cases there are two different mnemonics for the same
+// condition mask. In this case we cannot have both instructions available
+// to the disassembler at the same time since the encodings are not distinct.
+// Therefore the alternate forms are marked isAsmParserOnly.
+//
+// We don't make one of the two names an alias of the other because
+// we need the custom parsing routines to select the correct register class.
+//
+// This section provides helpers for generating the specific forms.
+//
+//===----------------------------------------------------------------------===//
+
+// A class to describe a variant of an instruction with condition mask.
+class CondVariant<bits<4> ccmaskin, string suffixin, bit alternatein> {
+ // The fixed condition mask to use.
+ bits<4> ccmask = ccmaskin;
+
+ // The suffix to use for the extended assembler mnemonic.
+ string suffix = suffixin;
+
+ // Whether this is an alternate that needs to be marked isAsmParserOnly.
+ bit alternate = alternatein;
+}
+
+// Condition mask 15 means "always true", which is used to define
+// unconditional branches as a variant of conditional branches.
+def CondAlways : CondVariant<15, "", 0>;
+
+// Condition masks for general instructions that can set all 4 bits.
+def CondVariantO : CondVariant<1, "o", 0>;
+def CondVariantH : CondVariant<2, "h", 0>;
+def CondVariantP : CondVariant<2, "p", 1>;
+def CondVariantNLE : CondVariant<3, "nle", 0>;
+def CondVariantL : CondVariant<4, "l", 0>;
+def CondVariantM : CondVariant<4, "m", 1>;
+def CondVariantNHE : CondVariant<5, "nhe", 0>;
+def CondVariantLH : CondVariant<6, "lh", 0>;
+def CondVariantNE : CondVariant<7, "ne", 0>;
+def CondVariantNZ : CondVariant<7, "nz", 1>;
+def CondVariantE : CondVariant<8, "e", 0>;
+def CondVariantZ : CondVariant<8, "z", 1>;
+def CondVariantNLH : CondVariant<9, "nlh", 0>;
+def CondVariantHE : CondVariant<10, "he", 0>;
+def CondVariantNL : CondVariant<11, "nl", 0>;
+def CondVariantNM : CondVariant<11, "nm", 1>;
+def CondVariantLE : CondVariant<12, "le", 0>;
+def CondVariantNH : CondVariant<13, "nh", 0>;
+def CondVariantNP : CondVariant<13, "np", 1>;
+def CondVariantNO : CondVariant<14, "no", 0>;
+
+// A helper class to look up one of the above by name.
+class CV<string name>
+ : CondVariant<!cast<CondVariant>("CondVariant"#name).ccmask,
+ !cast<CondVariant>("CondVariant"#name).suffix,
+ !cast<CondVariant>("CondVariant"#name).alternate>;
+
+// Condition masks for integer instructions (e.g. compare-and-branch).
+// This is like the list above, except that condition 3 is not possible
+// and that the low bit of the mask is therefore always 0. This means
+// that each condition has two names. Conditions "o" and "no" are not used.
+def IntCondVariantH : CondVariant<2, "h", 0>;
+def IntCondVariantNLE : CondVariant<2, "nle", 1>;
+def IntCondVariantL : CondVariant<4, "l", 0>;
+def IntCondVariantNHE : CondVariant<4, "nhe", 1>;
+def IntCondVariantLH : CondVariant<6, "lh", 0>;
+def IntCondVariantNE : CondVariant<6, "ne", 1>;
+def IntCondVariantE : CondVariant<8, "e", 0>;
+def IntCondVariantNLH : CondVariant<8, "nlh", 1>;
+def IntCondVariantHE : CondVariant<10, "he", 0>;
+def IntCondVariantNL : CondVariant<10, "nl", 1>;
+def IntCondVariantLE : CondVariant<12, "le", 0>;
+def IntCondVariantNH : CondVariant<12, "nh", 1>;
+
+// A helper class to look up one of the above by name.
+class ICV<string name>
+ : CondVariant<!cast<CondVariant>("IntCondVariant"#name).ccmask,
+ !cast<CondVariant>("IntCondVariant"#name).suffix,
+ !cast<CondVariant>("IntCondVariant"#name).alternate>;
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions with semantics
+//===----------------------------------------------------------------------===//
+//
+// These classes have the form [Cond]<Category><Format>, where <Format> is one
+// of the formats defined above and where <Category> describes the inputs
+// and outputs. "Cond" is used if the instruction is conditional,
+// in which case the 4-bit condition-code mask is added as a final operand.
+// <Category> can be one of:
+//
+// Inherent:
+// One register output operand and no input operands.
+//
+// InherentDual:
+// Two register output operands and no input operands.
+//
+// StoreInherent:
+// One address operand. The instruction stores to the address.
+//
+// SideEffectInherent:
+// No input or output operands, but causes some side effect.
+//
+// Branch:
+// One branch target. The instruction branches to the target.
+//
+// Call:
+// One output operand and one branch target. The instruction stores
+// the return address to the output operand and branches to the target.
+//
+// CmpBranch:
+// Two input operands and one optional branch target. The instruction
+// compares the two input operands and branches or traps on the result.
+//
+// BranchUnary:
+// One register output operand, one register input operand and one branch
+// target. The instructions stores a modified form of the source register
+// in the destination register and branches on the result.
+//
+// BranchBinary:
+// One register output operand, two register input operands and one branch
+// target. The instructions stores a modified form of one of the source
+// registers in the destination register and branches on the result.
+//
+// LoadMultiple:
+// One address input operand and two explicit output operands.
+// The instruction loads a range of registers from the address,
+// with the explicit operands giving the first and last register
+// to load. Other loaded registers are added as implicit definitions.
+//
+// StoreMultiple:
+// Two explicit input register operands and an address operand.
+// The instruction stores a range of registers to the address,
+// with the explicit operands giving the first and last register
+// to store. Other stored registers are added as implicit uses.
+//
+// StoreLength:
+// One value operand, one length operand and one address operand.
+// The instruction stores the value operand to the address but
+// doesn't write more than the number of bytes specified by the
+// length operand.
+//
+// LoadAddress:
+// One register output operand and one address operand.
+//
+// SideEffectAddress:
+// One address operand. No output operands, but causes some side effect.
+//
+// Unary:
+// One register output operand and one input operand.
+//
+// Store:
+// One address operand and one other input operand. The instruction
+// stores to the address.
+//
+// SideEffectUnary:
+// One input operand. No output operands, but causes some side effect.
+//
+// Binary:
+// One register output operand and two input operands.
+//
+// StoreBinary:
+// One address operand and two other input operands. The instruction
+// stores to the address.
+//
+// SideEffectBinary:
+// Two input operands. No output operands, but causes some side effect.
+//
+// Compare:
+// Two input operands and an implicit CC output operand.
+//
+// Test:
+// One or two input operands and an implicit CC output operand. If
+// present, the second input operand is an "address" operand used as
+// a test class mask.
+//
+// Ternary:
+// One register output operand and three input operands.
+//
+// SideEffectTernary:
+// Three input operands. No output operands, but causes some side effect.
+//
+// Quaternary:
+// One register output operand and four input operands.
+//
+// LoadAndOp:
+// One output operand and two input operands, one of which is an address.
+// The instruction both reads from and writes to the address.
+//
+// CmpSwap:
+// One output operand and three input operands, one of which is an address.
+// The instruction both reads from and writes to the address.
+//
+// RotateSelect:
+// One output operand and five input operands. The first two operands
+// are registers and the other three are immediates.
+//
+// Prefetch:
+// One 4-bit immediate operand and one address operand. The immediate
+// operand is 1 for a load prefetch and 2 for a store prefetch.
+//
+// BranchPreload:
+// One 4-bit immediate operand and two address operands.
+//
+// The format determines which input operands are tied to output operands,
+// and also determines the shape of any address operand.
+//
+// Multiclasses of the form <Category><Format>Pair define two instructions,
+// one with <Category><Format> and one with <Category><Format>Y. The name
+// of the first instruction has no suffix, the name of the second has
+// an extra "y".
+//
+//===----------------------------------------------------------------------===//
+
+class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ SDPatternOperator operator>
+ : InstRRE<opcode, (outs cls:$R1), (ins),
+ mnemonic#"\t$R1",
+ [(set cls:$R1, (operator))]> {
+ let R2 = 0;
+}
+
+class InherentDualRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRRE<opcode, (outs cls:$R1, cls:$R2), (ins),
+ mnemonic#"\t$R1, $R2", []>;
+
+class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value>
+ : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> {
+ let I2 = value;
+ let M3 = 0;
+}
+
+class StoreInherentS<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, bits<5> bytes>
+ : InstS<opcode, (outs), (ins bdaddr12only:$BD2),
+ mnemonic#"\t$BD2", [(operator bdaddr12only:$BD2)]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class SideEffectInherentE<string mnemonic, bits<16>opcode>
+ : InstE<opcode, (outs), (ins), mnemonic, []>;
+
+class SideEffectInherentS<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator>
+ : InstS<opcode, (outs), (ins), mnemonic, [(operator)]> {
+ let BD2 = 0;
+}
+
+class SideEffectInherentRRE<string mnemonic, bits<16> opcode>
+ : InstRRE<opcode, (outs), (ins), mnemonic, []> {
+ let R1 = 0;
+ let R2 = 0;
+}
+
+// Allow an optional TLS marker symbol to generate TLS call relocations.
+class CallRI<string mnemonic, bits<12> opcode>
+ : InstRIb<opcode, (outs), (ins GR64:$R1, brtarget16tls:$RI2),
+ mnemonic#"\t$R1, $RI2", []>;
+
+// Allow an optional TLS marker symbol to generate TLS call relocations.
+class CallRIL<string mnemonic, bits<12> opcode>
+ : InstRILb<opcode, (outs), (ins GR64:$R1, brtarget32tls:$RI2),
+ mnemonic#"\t$R1, $RI2", []>;
+
+class CallRR<string mnemonic, bits<8> opcode>
+ : InstRR<opcode, (outs), (ins GR64:$R1, ADDR64:$R2),
+ mnemonic#"\t$R1, $R2", []>;
+
+class CallRX<string mnemonic, bits<8> opcode>
+ : InstRXa<opcode, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2", []>;
+
+class CondBranchRI<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator = null_frag>
+ : InstRIc<opcode, (outs), (ins cond4:$valid, cond4:$M1, brtarget16:$RI2),
+ !subst("#", "${M1}", mnemonic)#"\t$RI2",
+ [(operator cond4:$valid, cond4:$M1, bb:$RI2)]> {
+ let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRI<string mnemonic, bits<12> opcode>
+ : InstRIc<opcode, (outs), (ins imm32zx4:$M1, brtarget16:$RI2),
+ mnemonic#"\t$M1, $RI2", []>;
+
+class FixedCondBranchRI<CondVariant V, string mnemonic, bits<12> opcode,
+ SDPatternOperator operator = null_frag>
+ : InstRIc<opcode, (outs), (ins brtarget16:$RI2),
+ !subst("#", V.suffix, mnemonic)#"\t$RI2", [(operator bb:$RI2)]> {
+ let isAsmParserOnly = V.alternate;
+ let M1 = V.ccmask;
+}
+
+class CondBranchRIL<string mnemonic, bits<12> opcode>
+ : InstRILc<opcode, (outs), (ins cond4:$valid, cond4:$M1, brtarget32:$RI2),
+ !subst("#", "${M1}", mnemonic)#"\t$RI2", []> {
+ let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRIL<string mnemonic, bits<12> opcode>
+ : InstRILc<opcode, (outs), (ins imm32zx4:$M1, brtarget32:$RI2),
+ mnemonic#"\t$M1, $RI2", []>;
+
+class FixedCondBranchRIL<CondVariant V, string mnemonic, bits<12> opcode>
+ : InstRILc<opcode, (outs), (ins brtarget32:$RI2),
+ !subst("#", V.suffix, mnemonic)#"\t$RI2", []> {
+ let isAsmParserOnly = V.alternate;
+ let M1 = V.ccmask;
+}
+
+class CondBranchRR<string mnemonic, bits<8> opcode>
+ : InstRR<opcode, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2),
+ !subst("#", "${R1}", mnemonic)#"\t$R2", []> {
+ let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRR<string mnemonic, bits<8> opcode>
+ : InstRR<opcode, (outs), (ins imm32zx4:$R1, GR64:$R2),
+ mnemonic#"\t$R1, $R2", []>;
+
+class FixedCondBranchRR<CondVariant V, string mnemonic, bits<8> opcode,
+ SDPatternOperator operator = null_frag>
+ : InstRR<opcode, (outs), (ins ADDR64:$R2),
+ !subst("#", V.suffix, mnemonic)#"\t$R2", [(operator ADDR64:$R2)]> {
+ let isAsmParserOnly = V.alternate;
+ let R1 = V.ccmask;
+}
+
+class CondBranchRX<string mnemonic, bits<8> opcode>
+ : InstRXb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr12only:$XBD2),
+ !subst("#", "${M1}", mnemonic)#"\t$XBD2", []> {
+ let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRX<string mnemonic, bits<8> opcode>
+ : InstRXb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$M1, $XBD2", []>;
+
+class FixedCondBranchRX<CondVariant V, string mnemonic, bits<8> opcode>
+ : InstRXb<opcode, (outs), (ins bdxaddr12only:$XBD2),
+ !subst("#", V.suffix, mnemonic)#"\t$XBD2", []> {
+ let isAsmParserOnly = V.alternate;
+ let M1 = V.ccmask;
+}
+
+class CmpBranchRIEa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
+ mnemonic#"$M3\t$R1, $I2", []>;
+
+class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $I2, $M3", []>;
+
+class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2),
+ mnemonic#V.suffix#"\t$R1, $I2", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>;
+ def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>;
+}
+
+class CmpBranchRIEb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRIEb<opcode, (outs),
+ (ins cls:$R1, cls:$R2, cond4:$M3, brtarget16:$RI4),
+ mnemonic#"$M3\t$R1, $R2, $RI4", []>;
+
+class AsmCmpBranchRIEb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRIEb<opcode, (outs),
+ (ins cls:$R1, cls:$R2, imm32zx4:$M3, brtarget16:$RI4),
+ mnemonic#"\t$R1, $R2, $M3, $RI4", []>;
+
+class FixedCmpBranchRIEb<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRIEb<opcode, (outs), (ins cls:$R1, cls:$R2, brtarget16:$RI4),
+ mnemonic#V.suffix#"\t$R1, $R2, $RI4", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRIEb<mnemonic, opcode, cls>;
+ def Asm : AsmCmpBranchRIEb<mnemonic, opcode, cls>;
+}
+
+class CmpBranchRIEc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEc<opcode, (outs),
+ (ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4),
+ mnemonic#"$M3\t$R1, $I2, $RI4", []>;
+
+class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEc<opcode, (outs),
+ (ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4),
+ mnemonic#"\t$R1, $I2, $M3, $RI4", []>;
+
+class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4),
+ mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>;
+ def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>;
+}
+
+class CmpBranchRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2, cond4:$M3),
+ mnemonic#"$M3\t$R1, $R2", []>;
+
+class AsmCmpBranchRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []>;
+
+multiclass CmpBranchRRFcPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRRFc<mnemonic, opcode, cls>;
+ def Asm : AsmCmpBranchRRFc<mnemonic, opcode, cls>;
+}
+
+class FixedCmpBranchRRFc<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2),
+ mnemonic#V.suffix#"\t$R1, $R2", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+class CmpBranchRRS<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRRS<opcode, (outs),
+ (ins cls:$R1, cls:$R2, cond4:$M3, bdaddr12only:$BD4),
+ mnemonic#"$M3\t$R1, $R2, $BD4", []>;
+
+class AsmCmpBranchRRS<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRRS<opcode, (outs),
+ (ins cls:$R1, cls:$R2, imm32zx4:$M3, bdaddr12only:$BD4),
+ mnemonic#"\t$R1, $R2, $M3, $BD4", []>;
+
+class FixedCmpBranchRRS<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRRS<opcode, (outs), (ins cls:$R1, cls:$R2, bdaddr12only:$BD4),
+ mnemonic#V.suffix#"\t$R1, $R2, $BD4", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRRS<mnemonic, opcode, cls>;
+ def Asm : AsmCmpBranchRRS<mnemonic, opcode, cls>;
+}
+
+class CmpBranchRIS<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIS<opcode, (outs),
+ (ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
+ mnemonic#"$M3\t$R1, $I2, $BD4", []>;
+
+class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIS<opcode, (outs),
+ (ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
+ mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
+
+class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
+ mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>;
+ def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>;
+}
+
+class CmpBranchRSYb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2, cond4:$M3),
+ mnemonic#"$M3\t$R1, $BD2", []>;
+
+class AsmCmpBranchRSYb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $M3, $BD2", []>;
+
+multiclass CmpBranchRSYbPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls> {
+ let isCodeGenOnly = 1 in
+ def "" : CmpBranchRSYb<mnemonic, opcode, cls>;
+ def Asm : AsmCmpBranchRSYb<mnemonic, opcode, cls>;
+}
+
+class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2),
+ mnemonic#V.suffix#"\t$R1, $BD2", []> {
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
+ : InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2),
+ mnemonic##"\t$R1, $RI2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls>
+ : InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2),
+ mnemonic##"\t$R1, $RI2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls>
+ : InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
+ mnemonic##"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
+ mnemonic##"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls>
+ : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+ mnemonic##"\t$R1, $XBD2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2),
+ mnemonic##"\t$R1, $XBD2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>
+ : InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2),
+ mnemonic##"\t$R1, $R3, $RI2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRIEe<opcode, (outs cls:$R1),
+ (ins cls:$R1src, cls:$R3, brtarget16:$RI2),
+ mnemonic##"\t$R1, $R3, $RI2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
+ : InstRSa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, cls:$R3, bdaddr12only:$BD2),
+ mnemonic##"\t$R1, $R3, $BD2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSYa<opcode,
+ (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2),
+ mnemonic##"\t$R1, $R3, $BD2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr12only>
+ : InstRSa<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let mayLoad = 1;
+}
+
+class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYa<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let mayLoad = 1;
+}
+
+multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,
+ bits<16> rsyOpcode, RegisterOperand cls> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : LoadMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>;
+ }
+}
+
+class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls>
+ : InstSSe<opcode, (outs cls:$R1, cls:$R3),
+ (ins bdaddr12only:$BD2, bdaddr12only:$BD4),
+ mnemonic#"\t$R1, $R3, $BD2, $BD4", []> {
+ let mayLoad = 1;
+}
+
+class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
+ : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2", []> {
+ let M4 = 0;
+ let mayLoad = 1;
+}
+
+class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
+ mnemonic#"\t$R1, $RI2",
+ [(operator cls:$R1, pcrel32:$RI2)]> {
+ let mayStore = 1;
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr12only>
+ : InstRXa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXYa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes,
+ bdxaddr20pair>;
+ }
+}
+
+class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<5> bytes, bits<4> type = 0>
+ : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$V1, $XBD2",
+ [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
+ let M3 = type;
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreLengthVRSb<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, bits<5> bytes>
+ : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
+ let M4 = 0;
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr12only>
+ : InstRSa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let mayStore = 1;
+}
+
+class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let mayStore = 1;
+}
+
+multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
+ bits<16> rsyOpcode, RegisterOperand cls> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : StoreMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>;
+ }
+}
+
+class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
+ : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2", []> {
+ let M4 = 0;
+ let mayStore = 1;
+}
+
+// StoreSI* instructions are used to store an integer to memory, but the
+// addresses are more restricted than for normal stores. If we are in the
+// situation of having to force either the address into a register or the
+// constant into a register, it's usually better to do the latter.
+// We therefore match the address in the same way as a normal store and
+// only use the StoreSI* instruction if the matched address is suitable.
+class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ Immediate imm>
+ : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
+ let mayStore = 1;
+}
+
+class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ Immediate imm>
+ : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator imm:$I2, mviaddr20pair:$BD1)]> {
+ let mayStore = 1;
+}
+
+class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ Immediate imm>
+ : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
+ let mayStore = 1;
+}
+
+multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+ SDPatternOperator operator, Immediate imm> {
+ let DispKey = mnemonic in {
+ let DispSize = "12" in
+ def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
+ let DispSize = "20" in
+ def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>;
+ }
+}
+
+class StoreSSE<string mnemonic, bits<16> opcode>
+ : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+ mnemonic#"\t$BD1, $BD2", []> {
+ let mayStore = 1;
+}
+
+class CondStoreRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $BD2", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like CondStoreRSY, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $BD2, $M3", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+// Like CondStoreRSY, but with a fixed CC mask.
+class FixedCondStoreRSY<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2),
+ mnemonic#V.suffix#"\t$R1, $BD2", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only> {
+ let isCodeGenOnly = 1 in
+ def "" : CondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
+ def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
+}
+
+class SideEffectUnaryI<string mnemonic, bits<8> opcode, Immediate imm>
+ : InstI<opcode, (outs), (ins imm:$I1),
+ mnemonic#"\t$I1", []>;
+
+class SideEffectUnaryRR<string mnemonic, bits<8>opcode, RegisterOperand cls>
+ : InstRR<opcode, (outs), (ins cls:$R1),
+ mnemonic#"\t$R1", []> {
+ let R2 = 0;
+}
+
+class SideEffectUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ SDPatternOperator operator>
+ : InstRRE<opcode, (outs), (ins cls:$R1),
+ mnemonic#"\t$R1", [(operator cls:$R1)]> {
+ let R2 = 0;
+}
+
+class SideEffectUnaryS<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, bits<5> bytes,
+ AddressingMode mode = bdaddr12only>
+ : InstS<opcode, (outs), (ins mode:$BD2),
+ mnemonic#"\t$BD2", [(operator mode:$BD2)]> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class SideEffectAddressS<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ AddressingMode mode = bdaddr12only>
+ : InstS<opcode, (outs), (ins mode:$BD2),
+ mnemonic#"\t$BD2", [(operator mode:$BD2)]>;
+
+class LoadAddressRX<string mnemonic, bits<8> opcode,
+ SDPatternOperator operator, AddressingMode mode>
+ : InstRXa<opcode, (outs GR64:$R1), (ins mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set GR64:$R1, (operator mode:$XBD2))]>;
+
+class LoadAddressRXY<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, AddressingMode mode>
+ : InstRXYa<opcode, (outs GR64:$R1), (ins mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set GR64:$R1, (operator mode:$XBD2))]>;
+
+multiclass LoadAddressRXPair<string mnemonic, bits<8> rxOpcode,
+ bits<16> rxyOpcode, SDPatternOperator operator> {
+ let DispKey = mnemonic in {
+ let DispSize = "12" in
+ def "" : LoadAddressRX<mnemonic, rxOpcode, operator, laaddr12pair>;
+ let DispSize = "20" in
+ def Y : LoadAddressRXY<mnemonic#"y", rxyOpcode, operator, laaddr20pair>;
+ }
+}
+
+class LoadAddressRIL<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator>
+ : InstRILb<opcode, (outs GR64:$R1), (ins pcrel32:$RI2),
+ mnemonic#"\t$R1, $RI2",
+ [(set GR64:$R1, (operator pcrel32:$RI2))]>;
+
+class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2),
+ mnemonic#"\t$R1, $R2",
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
+}
+
+class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2),
+ mnemonic#"\t$R1, $R2",
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
+}
+
+class UnaryTiedRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src),
+ mnemonic#"\t$R1", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R2 = 0;
+}
+
+class UnaryMemRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let M3 = 0;
+}
+
+class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator imm:$I2))]>;
+
+class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRILa<opcode, (outs cls:$R1), (ins imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator imm:$I2))]>;
+
+class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRILb<opcode, (outs cls:$R1), (ins pcrel32:$RI2),
+ mnemonic#"\t$R1, $RI2",
+ [(set cls:$R1, (operator pcrel32:$RI2))]> {
+ let mayLoad = 1;
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class CondUnaryRSY<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs cls:$R1),
+ (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $BD2",
+ [(set cls:$R1,
+ (z_select_ccmask (operator bdaddr20only:$BD2), cls:$R1src,
+ cond4:$valid, cond4:$M3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $BD2, $M3", []> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Like CondUnaryRSY, but with a fixed CC mask.
+class FixedCondUnaryRSY<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2),
+ mnemonic#V.suffix#"\t$R1, $BD2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CondUnaryRSYPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only> {
+ let isCodeGenOnly = 1 in
+ def "" : CondUnaryRSY<mnemonic, opcode, operator, cls, bytes, mode>;
+ def Asm : AsmCondUnaryRSY<mnemonic, opcode, cls, bytes, mode>;
+}
+
+
+class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr12only>
+ : InstRXa<opcode, (outs cls:$R1), (ins mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes>
+ : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let M3 = 0;
+}
+
+class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXYa<opcode, (outs cls:$R1), (ins mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes,
+ bdxaddr20pair>;
+ }
+}
+
+class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, Immediate imm, bits<4> type = 0>
+ : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
+ mnemonic#"\t$V1, $I2",
+ [(set tr.op:$V1, (tr.vt (operator imm:$I2)))]> {
+ let M3 = type;
+}
+
+class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, Immediate imm>
+ : InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $I2, $M3", []>;
+
+class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
+ bits<4> m5 = 0>
+ : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),
+ mnemonic#"\t$V1, $V2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]> {
+ let M3 = type;
+ let M4 = m4;
+ let M5 = m5;
+}
+
+class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0,
+ bits<4> m5 = 0>
+ : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $V2, $M3", []> {
+ let M4 = m4;
+ let M5 = m5;
+}
+
+class UnaryVRRaFloatGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0>
+ : InstVRRa<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V2, $M3, $M4", []> {
+ let M5 = m5;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+// The form that does not set CC has an extra operand to optionally allow
+// specifying arbitrary M5 values in assembler.
+multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc,
+ TypedReg tr1, TypedReg tr2, bits<4> type> {
+ let M3 = type, M4 = 0 in
+ def "" : InstVRRa<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M5", []>;
+ def : Pat<(tr1.vt (operator (tr2.vt tr2.op:$V2))),
+ (!cast<Instruction>(NAME) tr2.op:$V2, 0)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>;
+ let Defs = [CC] in
+ def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2,
+ type, 0, 1>;
+}
+
+multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> {
+ let M4 = 0 in
+ def "" : InstVRRa<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M3, $M5", []>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $M3",
+ (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2,
+ imm32zx4:$M3, 0)>;
+}
+
+class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<5> bytes, bits<4> type = 0>
+ : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2),
+ mnemonic#"\t$V1, $XBD2",
+ [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
+ let M3 = type;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class UnaryVRXGeneric<string mnemonic, bits<16> opcode>
+ : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3", []> {
+ let mayLoad = 1;
+}
+
+class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
+ RegisterOperand cls>
+ : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ mnemonic##"\t$R1, $XBD2", []>;
+
+class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
+ RegisterOperand cls>
+ : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
+ mnemonic##"\t$R1, $RI2", []> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class SideEffectBinaryRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []>;
+
+class SideEffectBinaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let R3 = 0;
+ let M4 = 0;
+}
+
+class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let M3 = 0;
+}
+
+class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
+ Immediate imm1, Immediate imm2>
+ : InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
+ mnemonic#"\t$I1, $I2", []>;
+
+class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm>
+ : InstSI<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2", []>;
+
+class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, Immediate imm>
+ : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
+
+class SideEffectBinarySSa<string mnemonic, bits<8> opcode>
+ : InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, bdaddr12only:$BD2),
+ mnemonic##"\t$BDL1, $BD2", []>;
+
+class SideEffectBinarySSb<string mnemonic, bits<8> opcode>
+ : InstSSb<opcode,
+ (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
+ mnemonic##"\t$BDL1, $BDL2", []>;
+
+class SideEffectBinarySSf<string mnemonic, bits<8> opcode>
+ : InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2),
+ mnemonic##"\t$BD1, $BDL2", []>;
+
+class SideEffectBinarySSE<string mnemonic, bits<16> opcode>
+ : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+ mnemonic#"\t$BD1, $BD2", []>;
+
+class SideEffectBinaryMemMemRR<string mnemonic, bits<8> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+}
+
+class SideEffectBinaryMemRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls2:$R2), (ins cls1:$R1, cls2:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R2 = $R2src";
+ let DisableEncoding = "$R2src";
+}
+
+class SideEffectBinaryMemMemRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+}
+
+class SideEffectBinaryMemMemRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+ let M3 = 0;
+}
+
+class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"\t$R1, $R2",
+ [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"\t$R1, $R2",
+ [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R3, cls2:$R2),
+ mnemonic#"\t$R1, $R3, $R2",
+ [(set cls1:$R1, (operator cls2:$R3, cls2:$R2))]> {
+ let OpKey = mnemonic#cls;
+ let OpType = "reg";
+}
+
+class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R2, $R3",
+ [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
+ let M4 = 0;
+}
+
+multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls1,
+ RegisterOperand cls2> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
+ }
+}
+
+multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls1,
+ RegisterOperand cls2> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
+ }
+}
+
+class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R3, $R2",
+ [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
+ let M4 = 0;
+}
+
+class BinaryMemRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
+ : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+multiclass BinaryMemRRFcOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2> {
+ def "" : BinaryMemRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>;
+ def Opt : UnaryMemRRFc<mnemonic, opcode, cls1, cls2>;
+}
+
+class BinaryRRFd<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFd<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $M4", []>;
+
+class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFe<opcode, (outs cls1:$R1), (ins imm32zx4:$M3, cls2:$R2),
+ mnemonic#"\t$R1, $M3, $R2", []> {
+ let M4 = 0;
+}
+
+class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Like CondBinaryRRF, but with a fixed CC mask.
+class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#V.suffix#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2> {
+ let isCodeGenOnly = 1 in
+ def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>;
+ def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
+}
+
+class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2),
+ mnemonic#"\t$R1, $R3, $I2",
+ [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
+
+multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
+ }
+}
+
+class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm>
+ : InstRIEg<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $I2",
+ [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
+ cond4:$valid, cond4:$M3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRIE, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm>
+ : InstRIEg<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $I2, $M3", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Like CondBinaryRIE, but with a fixed CC mask.
+class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm>
+ : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ mnemonic#V.suffix#"\t$R1, $I2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let isAsmParserOnly = V.alternate;
+ let M3 = V.ccmask;
+}
+
+multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls, Immediate imm> {
+ let isCodeGenOnly = 1 in
+ def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
+ def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
+}
+
+class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
+ mnemonic#"\t$R1, $BD2",
+ [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
+ let R3 = 0;
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
+
+multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
+ }
+}
+
+class BinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSLb<opcode, (outs cls:$R1),
+ (ins bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $BDL2, $M3", []> {
+ let mayLoad = 1;
+}
+
+class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr12only>
+ : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
+ : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src,
+ (load bdxaddr12only:$XBD2)))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let M3 = 0;
+}
+
+class BinaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2,
+ SDPatternOperator load, bits<5> bytes>
+ : InstRXF<opcode, (outs cls1:$R1), (ins cls2:$R3, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $R3, $XBD2",
+ [(set cls1:$R1, (operator cls2:$R3, (load bdxaddr12only:$XBD2)))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
+ bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, bytes,
+ bdxaddr20pair>;
+ }
+}
+
+class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ Operand imm, AddressingMode mode = bdaddr12only>
+ : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ Operand imm, AddressingMode mode = bdaddr20only>
+ : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
+ bits<16> siyOpcode, SDPatternOperator operator,
+ Operand imm> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+ }
+}
+
+class BinarySSF<string mnemonic, bits<12> opcode, RegisterOperand cls>
+ : InstSSF<opcode, (outs cls:$R3), (ins bdaddr12pair:$BD1, bdaddr12pair:$BD2),
+ mnemonic#"\t$R3, $BD1, $BD2", []> {
+ let mayLoad = 1;
+}
+
+class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<4> type>
+ : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
+ mnemonic#"\t$V1, $I2, $I3",
+ [(set tr.op:$V1, (tr.vt (operator imm32zx8:$I2, imm32zx8:$I3)))]> {
+ let M4 = type;
+}
+
+class BinaryVRIbGeneric<string mnemonic, bits<16> opcode>
+ : InstVRIb<opcode, (outs VR128:$V1),
+ (ins imm32zx8:$I2, imm32zx8:$I3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $I2, $I3, $M4", []>;
+
+class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
+ mnemonic#"\t$V1, $V3, $I2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
+ imm32zx16:$I2)))]> {
+ let M4 = type;
+}
+
+class BinaryVRIcGeneric<string mnemonic, bits<16> opcode>
+ : InstVRIc<opcode, (outs VR128:$V1),
+ (ins VR128:$V3, imm32zx16:$I2, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V3, $I2, $M4", []>;
+
+class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5>
+ : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
+ mnemonic#"\t$V1, $V2, $I3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ imm32zx12:$I3)))]> {
+ let M4 = type;
+ let M5 = m5;
+}
+
+class BinaryVRIeFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRIe<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $I3, $M4, $M5", []>;
+
+class BinaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0>
+ : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M5",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ imm32zx12:$M5)))]> {
+ let M3 = type;
+ let M4 = m4;
+}
+
+class BinaryVRRaFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRa<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M3, $M4, $M5", []>;
+
+class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0,
+ bits<4> modifier = 0>
+ : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
+ mnemonic#"\t$V1, $V2, $V3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3))))]> {
+ let M4 = type;
+ let M5 = modifier;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type, bits<4> modifier = 0> {
+ def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
+ !and (modifier, 14)>;
+ let Defs = [CC] in
+ def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ !add (!and (modifier, 14), 1)>;
+}
+
+class BinaryVRRbSPairGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+// The form that does not set CC has an extra operand to optionally allow
+// specifying arbitrary M5 values in assembler.
+multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc,
+ TypedReg tr1, TypedReg tr2, bits<4> type> {
+ let M4 = type in
+ def "" : InstVRRb<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M5", []>;
+ def : Pat<(tr1.vt (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3))),
+ (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, 0)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, 0)>;
+ let Defs = [CC] in
+ def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 1>;
+}
+
+multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
+ def "" : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
+ (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+ imm32zx4:$M4, 0)>;
+}
+
+class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0,
+ bits<4> m6 = 0>
+ : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
+ mnemonic#"\t$V1, $V2, $V3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3))))]> {
+ let M4 = type;
+ let M5 = m5;
+ let M6 = m6;
+}
+
+class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0,
+ bits<4> m6 = 0>
+ : InstVRRc<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V2, $V3, $M4", []> {
+ let M5 = m5;
+ let M6 = m6;
+}
+
+class BinaryVRRcFloatGeneric<string mnemonic, bits<16> opcode, bits<4> m6 = 0>
+ : InstVRRc<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> {
+ let M6 = m6;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type, bits<4> m5,
+ bits<4> modifier = 0> {
+ def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type,
+ m5, !and (modifier, 14)>;
+ let Defs = [CC] in
+ def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ m5, !add (!and (modifier, 14), 1)>;
+}
+
+class BinaryVRRcSPairFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRc<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5,
+ imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
+
+class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr>
+ : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3),
+ mnemonic#"\t$V1, $R2, $R3",
+ [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>;
+
+class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
+ shift12only:$BD2)))]> {
+ let M4 = type;
+}
+
+class BinaryVRSaGeneric<string mnemonic, bits<16> opcode>
+ : InstVRSa<opcode, (outs VR128:$V1),
+ (ins VR128:$V3, shift12only:$BD2, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
+
+class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ bits<5> bytes>
+ : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
+ let M4 = 0;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<4> type>
+ : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2),
+ mnemonic#"\t$R1, $V3, $BD2",
+ [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> {
+ let M4 = type;
+}
+
+class BinaryVRScGeneric<string mnemonic, bits<16> opcode>
+ : InstVRSc<opcode, (outs GR64:$R1),
+ (ins VR128:$V3, shift12only:$BD2, imm32zx4: $M4),
+ mnemonic#"\t$R1, $V3, $BD2, $M4", []>;
+
+class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<5> bytes>
+ : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3",
+ [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2,
+ imm32zx4:$M3)))]> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr12only>
+ : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ mnemonic#"\t$R1, $M3, $BD2", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ mnemonic#"\t$R1, $M3, $BD2", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,
+ bits<16> rsyOpcode, RegisterOperand cls,
+ bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : StoreBinaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : StoreBinaryRSY<mnemonic#"y", rsyOpcode, cls, bytes,
+ bdaddr20pair>;
+ }
+}
+
+class StoreBinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSLb<opcode, (outs),
+ (ins cls:$R1, bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $BDL2, $M3", []> {
+ let mayStore = 1;
+}
+
+class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
+ Immediate index>
+ : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
+ mnemonic#"\t$V1, $VBD2, $M3", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreBinaryVRX<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr, bits<5> bytes,
+ Immediate index>
+ : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3",
+ [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class MemoryBinarySSd<string mnemonic, bits<8> opcode,
+ RegisterOperand cls>
+ : InstSSd<opcode, (outs),
+ (ins bdraddr12only:$RBD1, bdaddr12only:$BD2, cls:$R3),
+ mnemonic#"\t$RBD1, $BD2, $R3", []>;
+
+class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2",
+ [(operator cls1:$R1, cls2:$R2)]> {
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
+ let isCompare = 1;
+}
+
+class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2",
+ [(operator cls1:$R1, cls2:$R2)]> {
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
+ let isCompare = 1;
+}
+
+class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2),
+ mnemonic#"\t$R1, $I2",
+ [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load>
+ : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
+ mnemonic#"\t$R1, $RI2",
+ [(operator cls:$R1, (load pcrel32:$RI2))]> {
+ let isCompare = 1;
+ let mayLoad = 1;
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr12only>
+ : InstRXa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let isCompare = 1;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
+ : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let isCompare = 1;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let M3 = 0;
+}
+
+class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXYa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let isCompare = 1;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
+ load, bytes, bdxaddr12pair>;
+ let DispSize = "20" in
+ def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls,
+ load, bytes, bdxaddr20pair>;
+ }
+}
+
+class CompareRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr12only>
+ : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ mnemonic#"\t$R1, $M3, $BD2", []> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ mnemonic#"\t$R1, $M3, $BD2", []> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+ RegisterOperand cls, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : CompareRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : CompareRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>;
+ }
+}
+
+class CompareSSb<string mnemonic, bits<8> opcode>
+ : InstSSb<opcode,
+ (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
+ mnemonic##"\t$BDL1, $BDL2", []> {
+ let isCompare = 1;
+ let mayLoad = 1;
+}
+
+class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ SDPatternOperator load, Immediate imm,
+ AddressingMode mode = bdaddr12only>
+ : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator (load mode:$BD1), imm:$I2)]> {
+ let isCompare = 1;
+ let mayLoad = 1;
+}
+
+class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ SDPatternOperator load, Immediate imm>
+ : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator (load bdaddr12only:$BD1), imm:$I2)]> {
+ let isCompare = 1;
+ let mayLoad = 1;
+}
+
+class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ SDPatternOperator load, Immediate imm,
+ AddressingMode mode = bdaddr20only>
+ : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ mnemonic#"\t$BD1, $I2",
+ [(operator (load mode:$BD1), imm:$I2)]> {
+ let isCompare = 1;
+ let mayLoad = 1;
+}
+
+multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+ SDPatternOperator operator, SDPatternOperator load,
+ Immediate imm> {
+ let DispKey = mnemonic in {
+ let DispSize = "12" in
+ def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm,
+ bdaddr20pair>;
+ }
+}
+
+class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<4> type>
+ : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),
+ mnemonic#"\t$V1, $V2",
+ [(operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2))]> {
+ let isCompare = 1;
+ let M3 = type;
+ let M4 = 0;
+ let M5 = 0;
+}
+
+class CompareVRRaGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRa<opcode, (outs), (ins VR128:$V1, VR128:$V2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $V2, $M3", []> {
+ let isCompare = 1;
+ let M4 = 0;
+ let M5 = 0;
+}
+
+class CompareVRRaFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRa<opcode, (outs),
+ (ins VR64:$V1, VR64:$V2, imm32zx4:$M3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V2, $M3, $M4", []> {
+ let isCompare = 1;
+ let M5 = 0;
+}
+
+class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, bdxaddr12only:$XBD2)]> {
+ let M3 = 0;
+}
+
+class TestRSL<string mnemonic, bits<16> opcode>
+ : InstRSLa<opcode, (outs), (ins bdladdr12onlylen4:$BDL1),
+ mnemonic#"\t$BDL1", []> {
+ let mayLoad = 1;
+}
+
+class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
+ : InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1,
+ shift12only:$BD2, imm32zx4:$I3),
+ mnemonic##"\t$BDL1, $BD2, $I3", []>;
+
+class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R2, $R3", []> {
+ let M4 = 0;
+}
+
+class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R3, $R2", []> {
+ let M4 = 0;
+}
+
+class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs cls1:$R1, cls2:$R2, cls3:$R3),
+ (ins cls1:$R1src, cls2:$R2src, cls3:$R3src),
+ mnemonic#"\t$R1, $R3, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src, $R3 = $R3src";
+ let DisableEncoding = "$R1src, $R2src, $R3src";
+ let M4 = 0;
+}
+
+class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ Immediate imm>
+ : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []>;
+
+multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2> {
+ def "" : SideEffectTernaryRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>;
+ def Opt : SideEffectBinaryRRFc<mnemonic, opcode, cls1, cls2>;
+}
+
+class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ Immediate imm>
+ : InstRRFc<opcode, (outs cls1:$R1, cls2:$R2),
+ (ins cls1:$R1src, cls2:$R2src, imm:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+}
+
+multiclass SideEffectTernaryMemMemRRFcOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2> {
+ def "" : SideEffectTernaryMemMemRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>;
+ def Opt : SideEffectBinaryMemMemRRFc<mnemonic, opcode, cls1, cls2>;
+}
+
+class SideEffectTernarySSF<string mnemonic, bits<12> opcode,
+ RegisterOperand cls>
+ : InstSSF<opcode, (outs),
+ (ins bdaddr12only:$BD1, bdaddr12only:$BD2, cls:$R3),
+ mnemonic#"\t$BD1, $BD2, $R3", []>;
+
+class TernaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+class TernaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs cls1:$R1, cls3:$R3),
+ (ins cls1:$R1src, cls2:$R2, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R3, $R2, $M4", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class TernaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFe<opcode, (outs cls1:$R1),
+ (ins imm32zx4:$M3, cls2:$R2, imm32zx4:$M4),
+ mnemonic#"\t$R1, $M3, $R2, $M4", []>;
+
+class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R1src, cls2:$R3, cls2:$R2),
+ mnemonic#"\t$R1, $R3, $R2",
+ [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, cls2:$R2))]> {
+ let OpKey = mnemonic#cls;
+ let OpType = "reg";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr12only>
+ : InstRSb<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm32zx4:$M3, mode:$BD2),
+ mnemonic#"\t$R1, $M3, $BD2", []> {
+
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : InstRSYb<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm32zx4:$M3, mode:$BD2),
+ mnemonic#"\t$R1, $M3, $BD2", []> {
+
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+ RegisterOperand cls, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : TernaryRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>;
+ }
+}
+
+class SideEffectTernaryRS<string mnemonic, bits<8> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSa<opcode, (outs),
+ (ins cls1:$R1, cls2:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []>;
+
+class SideEffectTernaryRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSYa<opcode, (outs),
+ (ins cls1:$R1, cls2:$R3, bdaddr20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []>;
+
+class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSa<opcode, (outs cls1:$R1, cls2:$R3),
+ (ins cls1:$R1src, cls2:$R3src, shift12only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let Constraints = "$R1 = $R1src, $R3 = $R3src";
+ let DisableEncoding = "$R1src, $R3src";
+}
+
+class SideEffectTernaryMemMemRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSYa<opcode, (outs cls1:$R1, cls2:$R3),
+ (ins cls1:$R1src, cls2:$R3src, shift20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let Constraints = "$R1 = $R1src, $R3 = $R3src";
+ let DisableEncoding = "$R1src, $R3src";
+}
+
+class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2,
+ SDPatternOperator load, bits<5> bytes>
+ : InstRXF<opcode, (outs cls1:$R1),
+ (ins cls2:$R1src, cls2:$R3, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $R3, $XBD2",
+ [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3,
+ (load bdxaddr12only:$XBD2)))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
+ : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
+ mnemonic#"\t$V1, $I2, $M3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ imm:$I2, index:$M3)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+}
+
+class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRId<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
+ mnemonic#"\t$V1, $V2, $V3, $I4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx8:$I4)))]> {
+ let M5 = type;
+}
+
+class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or>
+ : InstVRRa<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M4, $M5",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ imm32zx4:$M4,
+ imm32zx4:$M5)))],
+ m4or> {
+ let M3 = type;
+}
+
+class TernaryVRRaFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRa<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M3, $M4, $M5", []>;
+
+class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type,
+ SDPatternOperator m5mask, bits<4> m5or>
+ : InstVRRb<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M5",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ m5mask:$M5)))],
+ m5or> {
+ let M4 = type;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+// Also create aliases to make use of M5 operand optional in assembler.
+multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc,
+ TypedReg tr1, TypedReg tr2, bits<4> type,
+ bits<4> modifier = 0> {
+ def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
+ imm32zx4even, !and (modifier, 14)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, 0)>;
+ let Defs = [CC] in
+ def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ imm32zx4even, !add(!and (modifier, 14), 1)>;
+ def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
+ (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, 0)>;
+}
+
+multiclass TernaryOptVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
+ def "" : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
+ (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+ imm32zx4:$M4, 0)>;
+}
+
+class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2>
+ : InstVRRc<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V2, $V3, $M4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx4:$M4)))]> {
+ let M5 = 0;
+ let M6 = 0;
+}
+
+class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0>
+ : InstVRRd<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
+ mnemonic#"\t$V1, $V2, $V3, $V4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ (tr1.vt tr1.op:$V4))))]> {
+ let M5 = type;
+ let M6 = 0;
+}
+
+class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRd<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M5", []> {
+ let M6 = 0;
+}
+
+class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
+ : InstVRRe<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
+ mnemonic#"\t$V1, $V2, $V3, $V4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ (tr1.vt tr1.op:$V4))))]> {
+ let M5 = m5;
+ let M6 = type;
+}
+
+class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRe<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+
+class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type>
+ : InstVRSb<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ cls:$R3,
+ shift12only:$BD2)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let M4 = type;
+}
+
+class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
+ : InstVRSb<opcode, (outs VR128:$V1),
+ (ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
+ mnemonic#"\t$V1, $R3, $BD2, $M4", []> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+}
+
+class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
+ Immediate index>
+ : InstVRV<opcode, (outs VR128:$V1),
+ (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
+ mnemonic#"\t$V1, $VBD2, $M3", []> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
+ : InstVRX<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ bdxaddr12only:$XBD2,
+ index:$M3)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRId<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
+ mnemonic#"\t$V1, $V2, $V3, $I4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx8:$I4)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let M5 = type;
+}
+
+class QuaternaryVRIdGeneric<string mnemonic, bits<16> opcode>
+ : InstVRId<opcode, (outs VR128:$V1),
+ (ins VR128:$V1src, VR128:$V2, VR128:$V3,
+ imm32zx8:$I4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+}
+
+class QuaternaryVRRd<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
+ bits<4> type, SDPatternOperator m6mask, bits<4> m6or>
+ : InstVRRd<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ (tr2.vt tr2.op:$V4),
+ m6mask:$M6)))],
+ m6or> {
+ let M5 = type;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M6.
+// Also create aliases to make use of M6 operand optional in assembler.
+multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc,
+ TypedReg tr1, TypedReg tr2, bits<4> type,
+ bits<4> modifier = 0> {
+ def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type,
+ imm32zx4even, !and (modifier, 14)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, tr2.op:$V4, 0)>;
+ let Defs = [CC] in
+ def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ imm32zx4even, !add (!and (modifier, 14), 1)>;
+ def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
+ (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, tr2.op:$V4, 0)>;
+}
+
+multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
+ def "" : InstVRRd<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4,
+ imm32zx4:$M5, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
+ (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+ VR128:$V4, imm32zx4:$M5, 0)>;
+}
+
+class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+multiclass SideEffectQuaternaryRRFaOptOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3> {
+ def "" : SideEffectQuaternaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def Opt : SideEffectTernaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def OptOpt : SideEffectBinaryRRFa<mnemonic, opcode, cls1, cls2>;
+}
+
+class SideEffectQuaternaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R3, $R2, $M4", []>;
+
+multiclass SideEffectQuaternaryRRFbOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3> {
+ def "" : SideEffectQuaternaryRRFb<mnemonic, opcode, cls1, cls2, cls3>;
+ def Opt : SideEffectTernaryRRFb<mnemonic, opcode, cls1, cls2, cls3>;
+}
+
+class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode,
+ RegisterOperand cls>
+ : InstSSe<opcode, (outs),
+ (ins cls:$R1, bdaddr12only:$BD2, cls:$R3, bdaddr12only:$BD4),
+ mnemonic#"\t$R1, $BD2, $R3, $BD4", []>;
+
+class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdaddr20only>
+ : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator mode:$BD2, cls:$R3))]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class CmpSwapRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdaddr12only>
+ : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdaddr20only>
+ : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>;
+ }
+}
+
+class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRIEf<opcode, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5),
+ mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
+ : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
+ mnemonic##"\t$M1, $XBD2",
+ [(operator imm32zx4:$M1, bdxaddr20only:$XBD2)]>;
+
+class PrefetchRILPC<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator>
+ : InstRILc<opcode, (outs), (ins imm32zx4:$M1, pcrel32:$RI2),
+ mnemonic##"\t$M1, $RI2",
+ [(operator imm32zx4:$M1, pcrel32:$RI2)]> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class BranchPreloadSMI<string mnemonic, bits<8> opcode>
+ : InstSMI<opcode, (outs),
+ (ins imm32zx4:$M1, brtarget16bpp:$RI2, bdxaddr12only:$BD3),
+ mnemonic#"\t$M1, $RI2, $BD3", []>;
+
+class BranchPreloadMII<string mnemonic, bits<8> opcode>
+ : InstMII<opcode, (outs),
+ (ins imm32zx4:$M1, brtarget12bpp:$RI2, brtarget24bpp:$RI3),
+ mnemonic#"\t$M1, $RI2, $RI3", []>;
+
+// A floating-point load-and test operation. Create both a normal unary
+// operation and one that acts as a comparison against zero.
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
+multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls> {
+ def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
+ let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
+ def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+//
+// Convenience instructions that get lowered to real instructions
+// by either SystemZTargetLowering::EmitInstrWithCustomInserter()
+// or SystemZInstrInfo::expandPostRAPseudo().
+//
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, list<dag> pattern>
+ : InstSystemZ<0, outs, ins, "", pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+// Like SideEffectBinarySIL, but expanded later.
+class SideEffectBinarySILPseudo<SDPatternOperator operator, Immediate imm>
+ : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
+ [(operator bdaddr12only:$BD1, imm:$I2)]>;
+
+// Like UnaryRI, but expanded after RA depending on the choice of register.
+class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins imm:$I2),
+ [(set cls:$R1, (operator imm:$I2))]>;
+
+// Like UnaryRXY, but expanded after RA depending on the choice of register.
+class UnaryRXYPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs cls:$R1), (ins mode:$XBD2),
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = key#"r"#cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like UnaryRR, but expanded after RA depending on the choice of registers.
+class UnaryRRPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1), (ins cls2:$R2),
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = key#cls1;
+ let OpType = "reg";
+}
+
+// Like BinaryRI, but expanded after RA depending on the choice of register.
+class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// Like BinaryRIE, but expanded after RA depending on the choice of register.
+class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
+ [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
+
+// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
+multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm> {
+ let NumOpsKey = key in {
+ let NumOpsValue = "3" in
+ def K : BinaryRIEPseudo<null_frag, cls, imm>,
+ Requires<[FeatureHighWord, FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRIPseudo<operator, cls, imm>,
+ Requires<[FeatureHighWord]>;
+ }
+}
+
+// Like CompareRI, but expanded after RA depending on the choice of register.
+class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+// Like CompareRXY, but expanded after RA depending on the choice of register.
+class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like CondBinaryRRF, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRIE, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
+ [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
+ cond4:$valid, cond4:$M3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let CCMaskLast = 1;
+}
+
+// Like CondUnaryRSY, but expanded after RA depending on the choice of
+// register.
+class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
+ [(set cls:$R1,
+ (z_select_ccmask (operator mode:$BD2), cls:$R1src,
+ cond4:$valid, cond4:$R3))]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like CondStoreRSY, but expanded after RA depending on the choice of
+// register.
+class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+ let CCMaskLast = 1;
+}
+
+// Like StoreRXY, but expanded after RA depending on the choice of register.
+class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let mayStore = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like RotateSelectRIEf, but expanded after RA depending on the choice
+// of registers.
+class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5),
+ []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
+// the value of the PSW's 2-bit condition code field.
+class SelectWrapper<RegisterOperand cls>
+ : Pseudo<(outs cls:$dst),
+ (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
+ [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2,
+ imm32zx4:$valid, imm32zx4:$cc))]> {
+ let usesCustomInserter = 1;
+ // Although the instructions used by these nodes do not in themselves
+ // change CC, the insertion requires new blocks, and CC cannot be live
+ // across them.
+ let Defs = [CC];
+ let Uses = [CC];
+}
+
+// Stores $new to $addr if $cc is true ("" case) or false (Inv case).
+multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
+ SDPatternOperator load, AddressingMode mode> {
+ let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
+ def "" : Pseudo<(outs),
+ (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
+ [(store (z_select_ccmask cls:$new, (load mode:$addr),
+ imm32zx4:$valid, imm32zx4:$cc),
+ mode:$addr)]>;
+ def Inv : Pseudo<(outs),
+ (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
+ [(store (z_select_ccmask (load mode:$addr), cls:$new,
+ imm32zx4:$valid, imm32zx4:$cc),
+ mode:$addr)]>;
+ }
+}
+
+// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
+ dag pat, DAGOperand operand>
+ : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2),
+ [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> {
+ let Defs = [CC];
+ let Has20BitOffset = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadBinaryReg32<SDPatternOperator operator>
+ : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
+class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
+ : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
+class AtomicLoadBinaryReg64<SDPatternOperator operator>
+ : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
+class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
+ : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
+
+// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
+ DAGOperand operand>
+ : Pseudo<(outs GR32:$dst),
+ (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift,
+ ADDR32:$negbitshift, uimm32:$bitsize),
+ [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift,
+ ADDR32:$negbitshift, uimm32:$bitsize))]> {
+ let Defs = [CC];
+ let Has20BitOffset = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadWBinaryReg<SDPatternOperator operator>
+ : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
+class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
+ : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : SideEffectBinarySSa<mnemonic, opcode>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256)]>;
+ }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0. The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated. Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator> {
+ let Uses = [R0L] in
+ def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
+ def Loop : Pseudo<(outs GR64:$end),
+ (ins GR64:$start1, GR64:$start2, GR32:$char),
+ [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+ GR32:$char))]>;
+}
+
+// A pseudo instruction that is a direct alias of a real instruction.
+// These aliases are used in cases where a particular register operand is
+// fixed or where the same instruction is used with different register sizes.
+// The size parameter is the size in bytes of the associated real instruction.
+class Alias<int size, dag outs, dag ins, list<dag> pattern>
+ : InstSystemZ<size, outs, ins, "", pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
+
+// An alias of a UnaryVRR*, but with different register sizes.
+class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
+ : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
+
+// An alias of a UnaryVRX, but with different register sizes.
+class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
+ AddressingMode mode = bdxaddr12only>
+ : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
+ [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
+
+// An alias of a StoreVRX, but with different register sizes.
+class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
+ AddressingMode mode = bdxaddr12only>
+ : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
+ [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
+
+// An alias of a BinaryRI, but with different register sizes.
+class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a BinaryRIL, but with different register sizes.
+class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a BinaryVRRf, but with different register sizes.
+class BinaryAliasVRRf<RegisterOperand cls>
+ : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
+
+// An alias of a CompareRI, but with different register sizes.
+class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+// An alias of a RotateSelectRIEf, but with different register sizes.
+class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5), []> {
+ let Constraints = "$R1 = $R1src";
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td
new file mode 100644
index 000000000000..6d5b4b92f650
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrHFP.td
@@ -0,0 +1,240 @@
+//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ hexadecimal floating-point
+// arithmetic. Since this format is not mapped to any source-language data
+// type, these instructions are not used for code generation, but are provided
+// for use with the assembler and disassembler only.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and test.
+let Defs = [CC] in {
+ def LTER : UnaryRR <"lter", 0x32, null_frag, FP32, FP32>;
+ def LTDR : UnaryRR <"ltdr", 0x22, null_frag, FP64, FP64>;
+ def LTXR : UnaryRRE<"ltxr", 0xB362, null_frag, FP128, FP128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations.
+def LEDR : UnaryRR <"ledr", 0x35, null_frag, FP32, FP64>;
+def LEXR : UnaryRRE<"lexr", 0xB366, null_frag, FP32, FP128>;
+def LDXR : UnaryRR <"ldxr", 0x25, null_frag, FP64, FP128>;
+let isAsmParserOnly = 1 in {
+ def LRER : UnaryRR <"lrer", 0x35, null_frag, FP32, FP64>;
+ def LRDR : UnaryRR <"lrdr", 0x25, null_frag, FP64, FP128>;
+}
+
+// Extend floating-point values to wider representations.
+def LDER : UnaryRRE<"lder", 0xB324, null_frag, FP64, FP32>;
+def LXER : UnaryRRE<"lxer", 0xB326, null_frag, FP128, FP32>;
+def LXDR : UnaryRRE<"lxdr", 0xB325, null_frag, FP128, FP64>;
+
+def LDE : UnaryRXE<"lde", 0xED24, null_frag, FP64, 4>;
+def LXE : UnaryRXE<"lxe", 0xED26, null_frag, FP128, 4>;
+def LXD : UnaryRXE<"lxd", 0xED25, null_frag, FP128, 8>;
+
+// Convert a signed integer register value to a floating-point one.
+def CEFR : UnaryRRE<"cefr", 0xB3B4, null_frag, FP32, GR32>;
+def CDFR : UnaryRRE<"cdfr", 0xB3B5, null_frag, FP64, GR32>;
+def CXFR : UnaryRRE<"cxfr", 0xB3B6, null_frag, FP128, GR32>;
+
+def CEGR : UnaryRRE<"cegr", 0xB3C4, null_frag, FP32, GR64>;
+def CDGR : UnaryRRE<"cdgr", 0xB3C5, null_frag, FP64, GR64>;
+def CXGR : UnaryRRE<"cxgr", 0xB3C6, null_frag, FP128, GR64>;
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [CC] in {
+ def CFER : BinaryRRFe<"cfer", 0xB3B8, GR32, FP32>;
+ def CFDR : BinaryRRFe<"cfdr", 0xB3B9, GR32, FP64>;
+ def CFXR : BinaryRRFe<"cfxr", 0xB3BA, GR32, FP128>;
+
+ def CGER : BinaryRRFe<"cger", 0xB3C8, GR64, FP32>;
+ def CGDR : BinaryRRFe<"cgdr", 0xB3C9, GR64, FP64>;
+ def CGXR : BinaryRRFe<"cgxr", 0xB3CA, GR64, FP128>;
+}
+
+// Convert BFP to HFP.
+let Defs = [CC] in {
+ def THDER : UnaryRRE<"thder", 0xB358, null_frag, FP64, FP32>;
+ def THDR : UnaryRRE<"thdr", 0xB359, null_frag, FP64, FP64>;
+}
+
+// Convert HFP to BFP.
+let Defs = [CC] in {
+ def TBEDR : BinaryRRFe<"tbedr", 0xB350, FP32, FP64>;
+ def TBDR : BinaryRRFe<"tbdr", 0xB351, FP64, FP64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Negation (Load Complement).
+let Defs = [CC] in {
+ def LCER : UnaryRR <"lcer", 0x33, null_frag, FP32, FP32>;
+ def LCDR : UnaryRR <"lcdr", 0x23, null_frag, FP64, FP64>;
+ def LCXR : UnaryRRE<"lcxr", 0xB363, null_frag, FP128, FP128>;
+}
+
+// Absolute value (Load Positive).
+let Defs = [CC] in {
+ def LPER : UnaryRR <"lper", 0x30, null_frag, FP32, FP32>;
+ def LPDR : UnaryRR <"lpdr", 0x20, null_frag, FP64, FP64>;
+ def LPXR : UnaryRRE<"lpxr", 0xB360, null_frag, FP128, FP128>;
+}
+
+// Negative absolute value (Load Negative).
+let Defs = [CC] in {
+ def LNER : UnaryRR <"lner", 0x31, null_frag, FP32, FP32>;
+ def LNDR : UnaryRR <"lndr", 0x21, null_frag, FP64, FP64>;
+ def LNXR : UnaryRRE<"lnxr", 0xB361, null_frag, FP128, FP128>;
+}
+
+// Halve.
+def HER : UnaryRR <"her", 0x34, null_frag, FP32, FP32>;
+def HDR : UnaryRR <"hdr", 0x24, null_frag, FP64, FP64>;
+
+// Square root.
+def SQER : UnaryRRE<"sqer", 0xB245, null_frag, FP32, FP32>;
+def SQDR : UnaryRRE<"sqdr", 0xB244, null_frag, FP64, FP64>;
+def SQXR : UnaryRRE<"sqxr", 0xB336, null_frag, FP128, FP128>;
+
+def SQE : UnaryRXE<"sqe", 0xED34, null_frag, FP32, 4>;
+def SQD : UnaryRXE<"sqd", 0xED35, null_frag, FP64, 8>;
+
+// Round to an integer (rounding towards zero).
+def FIER : UnaryRRE<"fier", 0xB377, null_frag, FP32, FP32>;
+def FIDR : UnaryRRE<"fidr", 0xB37F, null_frag, FP64, FP64>;
+def FIXR : UnaryRRE<"fixr", 0xB367, null_frag, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC] in {
+ let isCommutable = 1 in {
+ def AER : BinaryRR<"aer", 0x3A, null_frag, FP32, FP32>;
+ def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>;
+ def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>;
+ }
+ def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>;
+ def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>;
+}
+
+// Addition (unnormalized).
+let Defs = [CC] in {
+ let isCommutable = 1 in {
+ def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>;
+ def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>;
+ }
+ def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>;
+ def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>;
+}
+
+// Subtraction.
+let Defs = [CC] in {
+ def SER : BinaryRR<"ser", 0x3B, null_frag, FP32, FP32>;
+ def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>;
+ def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>;
+
+ def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>;
+ def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>;
+}
+
+// Subtraction (unnormalized).
+let Defs = [CC] in {
+ def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>;
+ def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>;
+
+ def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>;
+ def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+ def MEER : BinaryRRE<"meer", 0xB337, null_frag, FP32, FP32>;
+ def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>;
+ def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>;
+}
+def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>;
+def MD : BinaryRX <"md", 0x6C, null_frag, FP64, load, 8>;
+
+// Extending multiplication (f32 x f32 -> f64).
+def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>;
+def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, load, 4>;
+let isAsmParserOnly = 1 in {
+ def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>;
+ def ME : BinaryRX<"me", 0x7C, null_frag, FP64, load, 4>;
+}
+
+// Extending multiplication (f64 x f64 -> f128).
+def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>;
+def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, load, 8>;
+
+// Fused multiply-add.
+def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>;
+def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>;
+def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, load, 4>;
+def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, load, 8>;
+
+// Fused multiply-subtract.
+def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>;
+def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>;
+def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, load, 4>;
+def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, load, 8>;
+
+// Multiplication (unnormalized).
+def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>;
+def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>;
+def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>;
+def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, load, 8>;
+def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, load, 8>;
+def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, load, 8>;
+
+// Fused multiply-add (unnormalized).
+def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP128, FP64>;
+def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>;
+def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>;
+def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, load, 8>;
+def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, load, 8>;
+def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, load, 8>;
+
+// Division.
+def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>;
+def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>;
+def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>;
+def DE : BinaryRX <"de", 0x7D, null_frag, FP32, load, 4>;
+def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, load, 8>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+ def CER : CompareRR <"cer", 0x39, null_frag, FP32, FP32>;
+ def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>;
+ def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>;
+
+ def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>;
+ def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 000000000000..66a5ff12be46
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,1807 @@
+//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstrInfo.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRMAP_INFO
+#include "SystemZGenInstrInfo.inc"
+
+// Return a mask with Count low bits set.
+static uint64_t allOnes(unsigned int Count) {
+ return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
+}
+
+// Reg should be a 32-bit GPR. Return true if it is a high register rather
+// than a low register.
+static bool isHighReg(unsigned int Reg) {
+ if (SystemZ::GRH32BitRegClass.contains(Reg))
+ return true;
+ assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
+ return false;
+}
+
+// Pin the vtable to this file.
+void SystemZInstrInfo::anchor() {}
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
+ : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+ RI(), STI(sti) {
+}
+
+// MI is a 128-bit load or store. Split it into two 64-bit loads or stores,
+// each having the opcode given by NewOpcode.
+void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
+ unsigned NewOpcode) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+
+ // Get two load or store instructions. Use the original instruction for one
+ // of them (arbitrarily the second here) and create a clone for the other.
+ MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI);
+ MBB->insert(MI, EarlierMI);
+
+ // Set up the two 64-bit registers and remember super reg and its flags.
+ MachineOperand &HighRegOp = EarlierMI->getOperand(0);
+ MachineOperand &LowRegOp = MI->getOperand(0);
+ unsigned Reg128 = LowRegOp.getReg();
+ unsigned Reg128Killed = getKillRegState(LowRegOp.isKill());
+ unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef());
+ HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
+ LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
+
+ if (MI->mayStore()) {
+ // Add implicit uses of the super register in case one of the subregs is
+ // undefined. We could track liveness and skip storing an undefined
+ // subreg, but this is hopefully rare (discovered with llvm-stress).
+ // If Reg128 was killed, set kill flag on MI.
+ unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit);
+ MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl);
+ MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed));
+ }
+
+ // The address in the first (high) instruction is already correct.
+ // Adjust the offset in the second (low) instruction.
+ MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
+ MachineOperand &LowOffsetOp = MI->getOperand(2);
+ LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
+
+ // Clear the kill flags on the registers in the first instruction.
+ if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse())
+ EarlierMI->getOperand(0).setIsKill(false);
+ EarlierMI->getOperand(1).setIsKill(false);
+ EarlierMI->getOperand(3).setIsKill(false);
+
+ // Set the opcodes.
+ unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
+ unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
+ assert(HighOpcode && LowOpcode && "Both offsets should be in range");
+
+ EarlierMI->setDesc(get(HighOpcode));
+ MI->setDesc(get(LowOpcode));
+}
+
+// Split ADJDYNALLOC instruction MI.
+void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ MachineOperand &OffsetMO = MI->getOperand(2);
+
+ uint64_t Offset = (MFFrame.getMaxCallFrameSize() +
+ SystemZMC::CallFrameSize +
+ OffsetMO.getImm());
+ unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
+ assert(NewOpcode && "No support for huge argument lists yet");
+ MI->setDesc(get(NewOpcode));
+ OffsetMO.setImm(Offset);
+}
+
+// MI is an RI-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand
+// and HighOpcode takes an unsigned 32-bit operand. In those cases,
+// MI has the same kind of operand as LowOpcode, so needs to be converted
+// if HighOpcode is used.
+void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode,
+ bool ConvertHigh) const {
+ unsigned Reg = MI.getOperand(0).getReg();
+ bool IsHigh = isHighReg(Reg);
+ MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode));
+ if (IsHigh && ConvertHigh)
+ MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm()));
+}
+
+// MI is a three-operand RIE-style pseudo instruction. Replace it with
+// LowOpcodeK if the registers are both low GR32s, otherwise use a move
+// followed by HighOpcode or LowOpcode, depending on whether the target
+// is a high or low GR32.
+void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned LowOpcodeK,
+ unsigned HighOpcode) const {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (!DestIsHigh && !SrcIsHigh)
+ MI.setDesc(get(LowOpcodeK));
+ else {
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
+ MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
+ MI.getOperand(1).setReg(DestReg);
+ MI.tieOperands(0, 1);
+ }
+}
+
+// MI is an RXY-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32.
+void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
+ MI.getOperand(2).getImm());
+ MI.setDesc(get(Opcode));
+}
+
+// MI is a load-on-condition pseudo instruction with a single register
+// (source or destination) operand. Replace it with LowOpcode if the
+// register is a low GR32 and HighOpcode if the register is a high GR32.
+void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode;
+ MI.setDesc(get(Opcode));
+}
+
+// MI is a load-register-on-condition pseudo instruction. Replace it with
+// LowOpcode if source and destination are both low GR32s and HighOpcode if
+// source and destination are both high GR32s.
+void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+
+ if (!DestIsHigh && !SrcIsHigh)
+ MI.setDesc(get(LowOpcode));
+ else if (DestIsHigh && SrcIsHigh)
+ MI.setDesc(get(HighOpcode));
+
+ // If we were unable to implement the pseudo with a single instruction, we
+ // need to convert it back into a branch sequence. This cannot be done here
+ // since the caller of expandPostRAPseudo does not handle changes to the CFG
+ // correctly. This change is defered to the SystemZExpandPseudo pass.
+}
+
+// MI is an RR-style pseudo instruction that zero-extends the low Size bits
+// of one GRX32 into another. Replace it with LowOpcode if both operands
+// are low registers, otherwise use RISB[LH]G.
+void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned Size) const {
+ MachineInstrBuilder MIB =
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
+ Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef());
+
+ // Keep the remaining operands as-is.
+ for (unsigned I = 2; I < MI.getNumOperands(); ++I)
+ MIB.add(MI.getOperand(I));
+
+ MI.eraseFromParent();
+}
+
+void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const unsigned Reg64 = MI->getOperand(0).getReg();
+ const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32);
+
+ // EAR can only load the low subregister so us a shift for %a0 to produce
+ // the GR containing %a0 and %a1.
+
+ // ear <reg>, %a0
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32)
+ .addReg(SystemZ::A0)
+ .addReg(Reg64, RegState::ImplicitDefine);
+
+ // sllg <reg>, <reg>, 32
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64)
+ .addReg(Reg64)
+ .addReg(0)
+ .addImm(32);
+
+ // ear <reg>, %a1
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32)
+ .addReg(SystemZ::A1);
+
+ // lg <reg>, 40(<reg>)
+ MI->setDesc(get(SystemZ::LG));
+ MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0);
+}
+
+// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
+// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg
+// are low registers, otherwise use RISB[LH]G. Size is the number of bits
+// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
+// KillSrc is true if this move is the last use of SrcReg.
+MachineInstrBuilder
+SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc,
+ bool UndefSrc) const {
+ unsigned Opcode;
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBHH;
+ else if (DestIsHigh && !SrcIsHigh)
+ Opcode = SystemZ::RISBHL;
+ else if (!DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBLH;
+ else {
+ return BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc));
+ }
+ unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
+ return BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ .addReg(DestReg, RegState::Undef)
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc))
+ .addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
+}
+
+MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
+ if (NewMI)
+ return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
+ return MI;
+ };
+
+ switch (MI.getOpcode()) {
+ case SystemZ::LOCRMux:
+ case SystemZ::LOCFHR:
+ case SystemZ::LOCR:
+ case SystemZ::LOCGR: {
+ auto &WorkingMI = cloneIfNew(MI);
+ // Invert condition.
+ unsigned CCValid = WorkingMI.getOperand(3).getImm();
+ unsigned CCMask = WorkingMI.getOperand(4).getImm();
+ WorkingMI.getOperand(4).setImm(CCMask ^ CCValid);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+ default:
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ }
+}
+
+// If MI is a simple load or store for a frame object, return the register
+// it loads or stores and set FrameIndex to the index of the frame object.
+// Return 0 otherwise.
+//
+// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
+static int isSimpleMove(const MachineInstr &MI, int &FrameIndex,
+ unsigned Flag) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ if ((MCID.TSFlags & Flag) && MI.getOperand(1).isFI() &&
+ MI.getOperand(2).getImm() == 0 && MI.getOperand(3).getReg() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ return 0;
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
+}
+
+bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI,
+ int &DestFrameIndex,
+ int &SrcFrameIndex) const {
+ // Check for MVC 0(Length,FI1),0(FI2)
+ const MachineFrameInfo &MFI = MI.getParent()->getParent()->getFrameInfo();
+ if (MI.getOpcode() != SystemZ::MVC || !MI.getOperand(0).isFI() ||
+ MI.getOperand(1).getImm() != 0 || !MI.getOperand(3).isFI() ||
+ MI.getOperand(4).getImm() != 0)
+ return false;
+
+ // Check that Length covers the full slots.
+ int64_t Length = MI.getOperand(2).getImm();
+ unsigned FI1 = MI.getOperand(0).getIndex();
+ unsigned FI2 = MI.getOperand(3).getIndex();
+ if (MFI.getObjectSize(FI1) != Length ||
+ MFI.getObjectSize(FI2) != Length)
+ return false;
+
+ DestFrameIndex = FI1;
+ SrcFrameIndex = FI2;
+ return true;
+}
+
+bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Most of the code and comments here are boilerplate.
+
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
+ if (!isUnpredicatedTerminator(*I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
+ if (!I->isBranch())
+ return true;
+
+ // Can't handle indirect branches.
+ SystemZII::Branch Branch(getBranchInfo(*I));
+ if (!Branch.Target->isMBB())
+ return true;
+
+ // Punt on compound branches.
+ if (Branch.Type != SystemZII::BranchNormal)
+ return true;
+
+ if (Branch.CCMask == SystemZ::CCMASK_ANY) {
+ // Handle unconditional branches.
+ if (!AllowModify) {
+ TBB = Branch.Target->getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (std::next(I) != MBB.end())
+ std::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = nullptr;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) {
+ TBB = nullptr;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = Branch.Target->getMBB();
+ continue;
+ }
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ // FIXME: add X86-style branch swap
+ FBB = TBB;
+ TBB = Branch.Target->getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Branch.CCValid));
+ Cond.push_back(MachineOperand::CreateImm(Branch.CCMask));
+ continue;
+ }
+
+ // Handle subsequent conditional branches.
+ assert(Cond.size() == 2 && TBB && "Should have seen a conditional branch");
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
+ if (TBB != Branch.Target->getMBB())
+ return true;
+
+ // If the conditions are the same, we can leave them alone.
+ unsigned OldCCValid = Cond[0].getImm();
+ unsigned OldCCMask = Cond[1].getImm();
+ if (OldCCValid == Branch.CCValid && OldCCMask == Branch.CCMask)
+ continue;
+
+ // FIXME: Try combining conditions like X86 does. Should be easy on Z!
+ return false;
+ }
+
+ return false;
+}
+
+unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved) const {
+ assert(!BytesRemoved && "code size not handled");
+
+ // Most of the code and comments here are boilerplate.
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (!I->isBranch())
+ break;
+ if (!getBranchInfo(*I).Target->isMBB())
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+bool SystemZInstrInfo::
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid condition");
+ Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm());
+ return false;
+}
+
+unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL,
+ int *BytesAdded) const {
+ // In this function we output 32-bit branches, which should always
+ // have enough range. They can be shortened and relaxed by later code
+ // in the pipeline, if desired.
+
+ // Shouldn't be a fall through.
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "SystemZ branch conditions have one component!");
+ assert(!BytesAdded && "code size not handled");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ unsigned CCValid = Cond[0].getImm();
+ unsigned CCMask = Cond[1].getImm();
+ BuildMI(&MBB, DL, get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask).addMBB(TBB);
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &Mask,
+ int &Value) const {
+ assert(MI.isCompare() && "Caller should have checked for a comparison");
+
+ if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isImm()) {
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = 0;
+ Value = MI.getOperand(1).getImm();
+ Mask = ~0;
+ return true;
+ }
+
+ return false;
+}
+
+// If Reg is a virtual register, return its definition, otherwise return null.
+static MachineInstr *getDef(unsigned Reg,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return nullptr;
+ return MRI->getUniqueVRegDef(Reg);
+}
+
+// Return true if MI is a shift of type Opcode by Imm bits.
+static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) {
+ return (MI->getOpcode() == Opcode &&
+ !MI->getOperand(2).getReg() &&
+ MI->getOperand(3).getImm() == Imm);
+}
+
+// If the destination of MI has no uses, delete it as dead.
+static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
+ if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
+ MI->eraseFromParent();
+}
+
+// Compare compares SrcReg against zero. Check whether SrcReg contains
+// the result of an IPM sequence whose input CC survives until Compare,
+// and whether Compare is therefore redundant. Delete it and return
+// true if so.
+static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+ MachineInstr *LGFR = nullptr;
+ MachineInstr *RLL = getDef(SrcReg, MRI);
+ if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
+ LGFR = RLL;
+ RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
+ }
+ if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
+ return false;
+
+ MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
+ if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
+ return false;
+
+ MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
+ if (!IPM || IPM->getOpcode() != SystemZ::IPM)
+ return false;
+
+ // Check that there are no assignments to CC between the IPM and Compare,
+ if (IPM->getParent() != Compare.getParent())
+ return false;
+ MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator();
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ if (MI.modifiesRegister(SystemZ::CC, TRI))
+ return false;
+ }
+
+ Compare.eraseFromParent();
+ if (LGFR)
+ eraseIfDead(LGFR, MRI);
+ eraseIfDead(RLL, MRI);
+ eraseIfDead(SRL, MRI);
+ eraseIfDead(IPM, MRI);
+
+ return true;
+}
+
+bool SystemZInstrInfo::optimizeCompareInstr(
+ MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask,
+ int Value, const MachineRegisterInfo *MRI) const {
+ assert(!SrcReg2 && "Only optimizing constant comparisons so far");
+ bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0;
+ return Value == 0 && !IsLogical &&
+ removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
+}
+
+bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Pred,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles,
+ int &FalseCycles) const {
+ // Not all subtargets have LOCR instructions.
+ if (!STI.hasLoadStoreOnCond())
+ return false;
+ if (Pred.size() != 2)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have LOCR instructions for 32 and 64 bit general purpose registers.
+ if ((STI.hasLoadStoreOnCond2() &&
+ SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) ||
+ SystemZ::GR32BitRegClass.hasSubClassEq(RC) ||
+ SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do anything else.
+ return false;
+}
+
+void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DstReg,
+ ArrayRef<MachineOperand> Pred,
+ unsigned TrueReg,
+ unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+
+ assert(Pred.size() == 2 && "Invalid condition");
+ unsigned CCValid = Pred[0].getImm();
+ unsigned CCMask = Pred[1].getImm();
+
+ unsigned Opc;
+ if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
+ if (STI.hasLoadStoreOnCond2())
+ Opc = SystemZ::LOCRMux;
+ else {
+ Opc = SystemZ::LOCR;
+ MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
+ unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg);
+ TrueReg = TReg;
+ FalseReg = FReg;
+ }
+ } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
+ Opc = SystemZ::LOCGR;
+ else
+ llvm_unreachable("Invalid register class");
+
+ BuildMI(MBB, I, DL, get(Opc), DstReg)
+ .addReg(FalseReg).addReg(TrueReg)
+ .addImm(CCValid).addImm(CCMask);
+}
+
+bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ unsigned DefOpc = DefMI.getOpcode();
+ if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI &&
+ DefOpc != SystemZ::LGHI)
+ return false;
+ if (DefMI.getOperand(0).getReg() != Reg)
+ return false;
+ int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm();
+
+ unsigned UseOpc = UseMI.getOpcode();
+ unsigned NewUseOpc;
+ unsigned UseIdx;
+ int CommuteIdx = -1;
+ switch (UseOpc) {
+ case SystemZ::LOCRMux:
+ if (!STI.hasLoadStoreOnCond2())
+ return false;
+ NewUseOpc = SystemZ::LOCHIMux;
+ if (UseMI.getOperand(2).getReg() == Reg)
+ UseIdx = 2;
+ else if (UseMI.getOperand(1).getReg() == Reg)
+ UseIdx = 2, CommuteIdx = 1;
+ else
+ return false;
+ break;
+ case SystemZ::LOCGR:
+ if (!STI.hasLoadStoreOnCond2())
+ return false;
+ NewUseOpc = SystemZ::LOCGHI;
+ if (UseMI.getOperand(2).getReg() == Reg)
+ UseIdx = 2;
+ else if (UseMI.getOperand(1).getReg() == Reg)
+ UseIdx = 2, CommuteIdx = 1;
+ else
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ if (CommuteIdx != -1)
+ if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx))
+ return false;
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI.setDesc(get(NewUseOpc));
+ UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
+ if (DeleteDef)
+ DefMI.eraseFromParent();
+
+ return true;
+}
+
+bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == SystemZ::Return ||
+ Opcode == SystemZ::Trap ||
+ Opcode == SystemZ::CallJG ||
+ Opcode == SystemZ::CallBR)
+ return true;
+ return false;
+}
+
+bool SystemZInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const {
+ // Avoid using conditional returns at the end of a loop (since then
+ // we'd need to emit an unconditional branch to the beginning anyway,
+ // making the loop body longer). This doesn't apply for low-probability
+ // loops (eg. compare-and-swap retry), so just decide based on branch
+ // probability instead of looping structure.
+ // However, since Compare and Trap instructions cost the same as a regular
+ // Compare instruction, we should allow the if conversion to convert this
+ // into a Conditional Compare regardless of the branch probability.
+ if (MBB.getLastNonDebugInstr()->getOpcode() != SystemZ::Trap &&
+ MBB.succ_empty() && Probability < BranchProbability(1, 8))
+ return false;
+ // For now only convert single instructions.
+ return NumCycles == 1;
+}
+
+bool SystemZInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumCyclesT, unsigned ExtraPredCyclesT,
+ MachineBasicBlock &FMBB,
+ unsigned NumCyclesF, unsigned ExtraPredCyclesF,
+ BranchProbability Probability) const {
+ // For now avoid converting mutually-exclusive cases.
+ return false;
+}
+
+bool SystemZInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const {
+ // For now only duplicate single instructions.
+ return NumCycles == 1;
+}
+
+bool SystemZInstrInfo::PredicateInstruction(
+ MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
+ assert(Pred.size() == 2 && "Invalid condition");
+ unsigned CCValid = Pred[0].getImm();
+ unsigned CCMask = Pred[1].getImm();
+ assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == SystemZ::Trap) {
+ MI.setDesc(get(SystemZ::CondTrap));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ if (Opcode == SystemZ::Return) {
+ MI.setDesc(get(SystemZ::CondReturn));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ if (Opcode == SystemZ::CallJG) {
+ MachineOperand FirstOp = MI.getOperand(0);
+ const uint32_t *RegMask = MI.getOperand(1).getRegMask();
+ MI.RemoveOperand(1);
+ MI.RemoveOperand(0);
+ MI.setDesc(get(SystemZ::CallBRCL));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .add(FirstOp)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ if (Opcode == SystemZ::CallBR) {
+ const uint32_t *RegMask = MI.getOperand(0).getRegMask();
+ MI.RemoveOperand(0);
+ MI.setDesc(get(SystemZ::CallBCR));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ return false;
+}
+
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
+ // Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the
+ // super register in case one of the subregs is undefined.
+ // This handles ADDR128 too.
+ if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc);
+ MachineInstrBuilder(*MBB.getParent(), std::prev(MBBI))
+ .addReg(SrcReg, RegState::Implicit);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc);
+ MachineInstrBuilder(*MBB.getParent(), std::prev(MBBI))
+ .addReg(SrcReg, (getKillRegState(KillSrc) | RegState::Implicit));
+ return;
+ }
+
+ if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc,
+ false);
+ return;
+ }
+
+ // Everything else needs only one instruction.
+ unsigned Opcode;
+ if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LGR;
+ else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
+ // For z13 we prefer LDR over LER to avoid partial register dependencies.
+ Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER;
+ else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LDR;
+ else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LXR;
+ else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::VLR32;
+ else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::VLR64;
+ else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::VLR;
+ else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::CPYA;
+ else if (SystemZ::AR32BitRegClass.contains(DestReg) &&
+ SystemZ::GR32BitRegClass.contains(SrcReg))
+ Opcode = SystemZ::SAR;
+ else if (SystemZ::GR32BitRegClass.contains(DestReg) &&
+ SystemZ::AR32BitRegClass.contains(SrcReg))
+ Opcode = SystemZ::EAR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void SystemZInstrInfo::storeRegToStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Callers may expect a single instruction, so keep 128-bit moves
+ // together for now and lower them after register allocation.
+ unsigned LoadOpcode, StoreOpcode;
+ getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+ addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
+ .addReg(SrcReg, getKillRegState(isKill)),
+ FrameIdx);
+}
+
+void SystemZInstrInfo::loadRegFromStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Callers may expect a single instruction, so keep 128-bit moves
+ // together for now and lower them after register allocation.
+ unsigned LoadOpcode, StoreOpcode;
+ getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+ addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg),
+ FrameIdx);
+}
+
+// Return true if MI is a simple load or store with a 12-bit displacement
+// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
+static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ return ((MCID.TSFlags & Flag) &&
+ isUInt<12>(MI->getOperand(2).getImm()) &&
+ MI->getOperand(3).getReg() == 0);
+}
+
+namespace {
+
+struct LogicOp {
+ LogicOp() = default;
+ LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
+ : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
+
+ explicit operator bool() const { return RegSize; }
+
+ unsigned RegSize = 0;
+ unsigned ImmLSB = 0;
+ unsigned ImmSize = 0;
+};
+
+} // end anonymous namespace
+
+static LogicOp interpretAndImmediate(unsigned Opcode) {
+ switch (Opcode) {
+ case SystemZ::NILMux: return LogicOp(32, 0, 16);
+ case SystemZ::NIHMux: return LogicOp(32, 16, 16);
+ case SystemZ::NILL64: return LogicOp(64, 0, 16);
+ case SystemZ::NILH64: return LogicOp(64, 16, 16);
+ case SystemZ::NIHL64: return LogicOp(64, 32, 16);
+ case SystemZ::NIHH64: return LogicOp(64, 48, 16);
+ case SystemZ::NIFMux: return LogicOp(32, 0, 32);
+ case SystemZ::NILF64: return LogicOp(64, 0, 32);
+ case SystemZ::NIHF64: return LogicOp(64, 32, 32);
+ default: return LogicOp();
+ }
+}
+
+static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
+ if (OldMI->registerDefIsDead(SystemZ::CC)) {
+ MachineOperand *CCDef = NewMI->findRegisterDefOperand(SystemZ::CC);
+ if (CCDef != nullptr)
+ CCDef->setIsDead(true);
+ }
+}
+
+// Used to return from convertToThreeAddress after replacing two-address
+// instruction OldMI with three-address instruction NewMI.
+static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
+ MachineInstr *NewMI,
+ LiveVariables *LV) {
+ if (LV) {
+ unsigned NumOps = OldMI->getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = OldMI->getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
+ }
+ }
+ transferDeadCC(OldMI, NewMI);
+ return NewMI;
+}
+
+MachineInstr *SystemZInstrInfo::convertToThreeAddress(
+ MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOps = MI.getNumOperands();
+
+ // Try to convert something like SLL into SLLK, if supported.
+ // We prefer to keep the two-operand form where possible both
+ // because it tends to be shorter and because some instructions
+ // have memory forms that can be used during spilling.
+ if (STI.hasDistinctOps()) {
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
+ unsigned DestReg = Dest.getReg();
+ unsigned SrcReg = Src.getReg();
+ // AHIMux is only really a three-operand instruction when both operands
+ // are low registers. Try to constrain both operands to be low if
+ // possible.
+ if (Opcode == SystemZ::AHIMux &&
+ TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
+ MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
+ MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
+ MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
+ }
+ int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
+ if (ThreeOperandOpcode >= 0) {
+ // Create three address instruction without adding the implicit
+ // operands. Those will instead be copied over from the original
+ // instruction by the loop below.
+ MachineInstrBuilder MIB(
+ *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
+ /*NoImplicit=*/true));
+ MIB.add(Dest);
+ // Keep the kill state, but drop the tied flag.
+ MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
+ // Keep the remaining operands as-is.
+ for (unsigned I = 2; I < NumOps; ++I)
+ MIB.add(MI.getOperand(I));
+ MBB->insert(MI, MIB);
+ return finishConvertToThreeAddress(&MI, MIB, LV);
+ }
+ }
+
+ // Try to convert an AND into an RISBG-type instruction.
+ if (LogicOp And = interpretAndImmediate(Opcode)) {
+ uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
+ // AND IMMEDIATE leaves the other bits of the register unchanged.
+ Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
+ unsigned Start, End;
+ if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
+ unsigned NewOpcode;
+ if (And.RegSize == 64) {
+ NewOpcode = SystemZ::RISBG;
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (STI.hasMiscellaneousExtensions())
+ NewOpcode = SystemZ::RISBGN;
+ } else {
+ NewOpcode = SystemZ::RISBMux;
+ Start &= 31;
+ End &= 31;
+ }
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode))
+ .add(Dest)
+ .addReg(0)
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()),
+ Src.getSubReg())
+ .addImm(Start)
+ .addImm(End + 128)
+ .addImm(0);
+ return finishConvertToThreeAddress(&MI, MIB, LV);
+ }
+ }
+ return nullptr;
+}
+
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex,
+ LiveIntervals *LIS) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned Size = MFI.getObjectSize(FrameIndex);
+ unsigned Opcode = MI.getOpcode();
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) {
+
+ // Check CC liveness, since new instruction introduces a dead
+ // def of CC.
+ MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
+ LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
+ ++CCUnit;
+ assert(!CCUnit.isValid() && "CC only has one reg unit.");
+ SlotIndex MISlot =
+ LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ if (!CCLiveRange.liveAt(MISlot)) {
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(2).getImm());
+ BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
+ CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator());
+ return BuiltMI;
+ }
+ }
+ return nullptr;
+ }
+
+ // All other cases require a single operand.
+ if (Ops.size() != 1)
+ return nullptr;
+
+ unsigned OpNum = Ops[0];
+ assert(Size * 8 ==
+ TRI->getRegSizeInBits(*MF.getRegInfo()
+ .getRegClass(MI.getOperand(OpNum).getReg())) &&
+ "Invalid size combination");
+
+ if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 &&
+ isInt<8>(MI.getOperand(2).getImm())) {
+ // A(G)HI %reg, CONST -> A(G)SI %mem, CONST
+ Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
+ MachineInstr *BuiltMI =
+ BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(2).getImm());
+ transferDeadCC(&MI, BuiltMI);
+ return BuiltMI;
+ }
+
+ if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
+ bool Op0IsGPR = (Opcode == SystemZ::LGDR);
+ bool Op1IsGPR = (Opcode == SystemZ::LDGR);
+ // If we're spilling the destination of an LDGR or LGDR, store the
+ // source register instead.
+ if (OpNum == 0) {
+ unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+ get(StoreOpcode))
+ .add(MI.getOperand(1))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
+ }
+ // If we're spilling the source of an LDGR or LGDR, load the
+ // destination register instead.
+ if (OpNum == 1) {
+ unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+ get(LoadOpcode))
+ .add(MI.getOperand(0))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
+ }
+ }
+
+ // Look for cases where the source of a simple store or the destination
+ // of a simple load is being spilled. Try to use MVC instead.
+ //
+ // Although MVC is in practice a fast choice in these cases, it is still
+ // logically a bytewise copy. This means that we cannot use it if the
+ // load or store is volatile. We also wouldn't be able to use MVC if
+ // the two memories partially overlap, but that case cannot occur here,
+ // because we know that one of the memories is a full frame index.
+ //
+ // For performance reasons, we also want to avoid using MVC if the addresses
+ // might be equal. We don't worry about that case here, because spill slot
+ // coloring happens later, and because we have special code to remove
+ // MVCs that turn out to be redundant.
+ if (OpNum == 0 && MI.hasOneMemOperand()) {
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (MMO->getSize() == Size && !MMO->isVolatile()) {
+ // Handle conversion of loads.
+ if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) {
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+ get(SystemZ::MVC))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(Size)
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .addMemOperand(MMO);
+ }
+ // Handle conversion of stores.
+ if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) {
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+ get(SystemZ::MVC))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(Size)
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addMemOperand(MMO);
+ }
+ }
+ }
+
+ // If the spilled operand is the final one, try to change <INSN>R
+ // into <INSN>.
+ int MemOpcode = SystemZ::getMemOpcode(Opcode);
+ if (MemOpcode >= 0) {
+ unsigned NumOps = MI.getNumExplicitOperands();
+ if (OpNum == NumOps - 1) {
+ const MCInstrDesc &MemDesc = get(MemOpcode);
+ uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
+ assert(AccessBytes != 0 && "Size of access should be known");
+ assert(AccessBytes <= Size && "Access outside the frame index");
+ uint64_t Offset = Size - AccessBytes;
+ MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(MemOpcode));
+ for (unsigned I = 0; I < OpNum; ++I)
+ MIB.add(MI.getOperand(I));
+ MIB.addFrameIndex(FrameIndex).addImm(Offset);
+ if (MemDesc.TSFlags & SystemZII::HasIndex)
+ MIB.addReg(0);
+ transferDeadCC(&MI, MIB);
+ return MIB;
+ }
+ }
+
+ return nullptr;
+}
+
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ return nullptr;
+}
+
+bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case SystemZ::L128:
+ splitMove(MI, SystemZ::LG);
+ return true;
+
+ case SystemZ::ST128:
+ splitMove(MI, SystemZ::STG);
+ return true;
+
+ case SystemZ::LX:
+ splitMove(MI, SystemZ::LD);
+ return true;
+
+ case SystemZ::STX:
+ splitMove(MI, SystemZ::STD);
+ return true;
+
+ case SystemZ::LBMux:
+ expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH);
+ return true;
+
+ case SystemZ::LHMux:
+ expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH);
+ return true;
+
+ case SystemZ::LLCRMux:
+ expandZExtPseudo(MI, SystemZ::LLCR, 8);
+ return true;
+
+ case SystemZ::LLHRMux:
+ expandZExtPseudo(MI, SystemZ::LLHR, 16);
+ return true;
+
+ case SystemZ::LLCMux:
+ expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH);
+ return true;
+
+ case SystemZ::LLHMux:
+ expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH);
+ return true;
+
+ case SystemZ::LMux:
+ expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
+ return true;
+
+ case SystemZ::LOCMux:
+ expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH);
+ return true;
+
+ case SystemZ::LOCHIMux:
+ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI);
+ return true;
+
+ case SystemZ::LOCRMux:
+ expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
+ return true;
+
+ case SystemZ::STCMux:
+ expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
+ return true;
+
+ case SystemZ::STHMux:
+ expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH);
+ return true;
+
+ case SystemZ::STMux:
+ expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
+ return true;
+
+ case SystemZ::STOCMux:
+ expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH);
+ return true;
+
+ case SystemZ::LHIMux:
+ expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
+ return true;
+
+ case SystemZ::IIFMux:
+ expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false);
+ return true;
+
+ case SystemZ::IILMux:
+ expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false);
+ return true;
+
+ case SystemZ::IIHMux:
+ expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false);
+ return true;
+
+ case SystemZ::NIFMux:
+ expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false);
+ return true;
+
+ case SystemZ::NILMux:
+ expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false);
+ return true;
+
+ case SystemZ::NIHMux:
+ expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false);
+ return true;
+
+ case SystemZ::OIFMux:
+ expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false);
+ return true;
+
+ case SystemZ::OILMux:
+ expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false);
+ return true;
+
+ case SystemZ::OIHMux:
+ expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false);
+ return true;
+
+ case SystemZ::XIFMux:
+ expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false);
+ return true;
+
+ case SystemZ::TMLMux:
+ expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false);
+ return true;
+
+ case SystemZ::TMHMux:
+ expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false);
+ return true;
+
+ case SystemZ::AHIMux:
+ expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::AHIMuxK:
+ expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH);
+ return true;
+
+ case SystemZ::AFIMux:
+ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::CHIMux:
+ expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false);
+ return true;
+
+ case SystemZ::CFIMux:
+ expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false);
+ return true;
+
+ case SystemZ::CLFIMux:
+ expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false);
+ return true;
+
+ case SystemZ::CMux:
+ expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF);
+ return true;
+
+ case SystemZ::CLMux:
+ expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF);
+ return true;
+
+ case SystemZ::RISBMux: {
+ bool DestIsHigh = isHighReg(MI.getOperand(0).getReg());
+ bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg());
+ if (SrcIsHigh == DestIsHigh)
+ MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
+ else {
+ MI.setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
+ MI.getOperand(5).setImm(MI.getOperand(5).getImm() ^ 32);
+ }
+ return true;
+ }
+
+ case SystemZ::ADJDYNALLOC:
+ splitAdjDynAlloc(MI);
+ return true;
+
+ case TargetOpcode::LOAD_STACK_GUARD:
+ expandLoadStackGuard(&MI);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const char *AsmStr = MI.getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ return MI.getDesc().getSize();
+}
+
+SystemZII::Branch
+SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case SystemZ::BR:
+ case SystemZ::J:
+ case SystemZ::JG:
+ return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY,
+ SystemZ::CCMASK_ANY, &MI.getOperand(0));
+
+ case SystemZ::BRC:
+ case SystemZ::BRCL:
+ return SystemZII::Branch(SystemZII::BranchNormal, MI.getOperand(0).getImm(),
+ MI.getOperand(1).getImm(), &MI.getOperand(2));
+
+ case SystemZ::BRCT:
+ case SystemZ::BRCTH:
+ return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
+ SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
+
+ case SystemZ::BRCTG:
+ return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP,
+ SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
+
+ case SystemZ::CIJ:
+ case SystemZ::CRJ:
+ return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
+
+ case SystemZ::CLIJ:
+ case SystemZ::CLRJ:
+ return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP,
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
+
+ case SystemZ::CGIJ:
+ case SystemZ::CGRJ:
+ return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP,
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
+
+ case SystemZ::CLGIJ:
+ case SystemZ::CLGRJ:
+ return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
+
+ default:
+ llvm_unreachable("Unrecognized branch opcode");
+ }
+}
+
+void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
+ unsigned &LoadOpcode,
+ unsigned &StoreOpcode) const {
+ if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
+ LoadOpcode = SystemZ::L;
+ StoreOpcode = SystemZ::ST;
+ } else if (RC == &SystemZ::GRH32BitRegClass) {
+ LoadOpcode = SystemZ::LFH;
+ StoreOpcode = SystemZ::STFH;
+ } else if (RC == &SystemZ::GRX32BitRegClass) {
+ LoadOpcode = SystemZ::LMux;
+ StoreOpcode = SystemZ::STMux;
+ } else if (RC == &SystemZ::GR64BitRegClass ||
+ RC == &SystemZ::ADDR64BitRegClass) {
+ LoadOpcode = SystemZ::LG;
+ StoreOpcode = SystemZ::STG;
+ } else if (RC == &SystemZ::GR128BitRegClass ||
+ RC == &SystemZ::ADDR128BitRegClass) {
+ LoadOpcode = SystemZ::L128;
+ StoreOpcode = SystemZ::ST128;
+ } else if (RC == &SystemZ::FP32BitRegClass) {
+ LoadOpcode = SystemZ::LE;
+ StoreOpcode = SystemZ::STE;
+ } else if (RC == &SystemZ::FP64BitRegClass) {
+ LoadOpcode = SystemZ::LD;
+ StoreOpcode = SystemZ::STD;
+ } else if (RC == &SystemZ::FP128BitRegClass) {
+ LoadOpcode = SystemZ::LX;
+ StoreOpcode = SystemZ::STX;
+ } else if (RC == &SystemZ::VR32BitRegClass) {
+ LoadOpcode = SystemZ::VL32;
+ StoreOpcode = SystemZ::VST32;
+ } else if (RC == &SystemZ::VR64BitRegClass) {
+ LoadOpcode = SystemZ::VL64;
+ StoreOpcode = SystemZ::VST64;
+ } else if (RC == &SystemZ::VF128BitRegClass ||
+ RC == &SystemZ::VR128BitRegClass) {
+ LoadOpcode = SystemZ::VL;
+ StoreOpcode = SystemZ::VST;
+ } else
+ llvm_unreachable("Unsupported regclass to load or store");
+}
+
+unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
+ int64_t Offset) const {
+ const MCInstrDesc &MCID = get(Opcode);
+ int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
+ if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
+ // Get the instruction to use for unsigned 12-bit displacements.
+ int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode);
+ if (Disp12Opcode >= 0)
+ return Disp12Opcode;
+
+ // All address-related instructions can use unsigned 12-bit
+ // displacements.
+ return Opcode;
+ }
+ if (isInt<20>(Offset) && isInt<20>(Offset2)) {
+ // Get the instruction to use for signed 20-bit displacements.
+ int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode);
+ if (Disp20Opcode >= 0)
+ return Disp20Opcode;
+
+ // Check whether Opcode allows signed 20-bit displacements.
+ if (MCID.TSFlags & SystemZII::Has20BitOffset)
+ return Opcode;
+ }
+ return 0;
+}
+
+unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
+ switch (Opcode) {
+ case SystemZ::L: return SystemZ::LT;
+ case SystemZ::LY: return SystemZ::LT;
+ case SystemZ::LG: return SystemZ::LTG;
+ case SystemZ::LGF: return SystemZ::LTGF;
+ case SystemZ::LR: return SystemZ::LTR;
+ case SystemZ::LGFR: return SystemZ::LTGFR;
+ case SystemZ::LGR: return SystemZ::LTGR;
+ case SystemZ::LER: return SystemZ::LTEBR;
+ case SystemZ::LDR: return SystemZ::LTDBR;
+ case SystemZ::LXR: return SystemZ::LTXBR;
+ case SystemZ::LCDFR: return SystemZ::LCDBR;
+ case SystemZ::LPDFR: return SystemZ::LPDBR;
+ case SystemZ::LNDFR: return SystemZ::LNDBR;
+ case SystemZ::LCDFR_32: return SystemZ::LCEBR;
+ case SystemZ::LPDFR_32: return SystemZ::LPEBR;
+ case SystemZ::LNDFR_32: return SystemZ::LNEBR;
+ // On zEC12 we prefer to use RISBGN. But if there is a chance to
+ // actually use the condition code, we may turn it back into RISGB.
+ // Note that RISBG is not really a "load-and-test" instruction,
+ // but sets the same condition code values, so is OK to use here.
+ case SystemZ::RISBGN: return SystemZ::RISBG;
+ default: return 0;
+ }
+}
+
+// Return true if Mask matches the regexp 0*1+0*, given that zero masks
+// have already been filtered out. Store the first set bit in LSB and
+// the number of set bits in Length if so.
+static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
+ unsigned First = findFirstSet(Mask);
+ uint64_t Top = (Mask >> First) + 1;
+ if ((Top & -Top) == Top) {
+ LSB = First;
+ Length = findFirstSet(Top);
+ return true;
+ }
+ return false;
+}
+
+bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
+ unsigned &Start, unsigned &End) const {
+ // Reject trivial all-zero masks.
+ Mask &= allOnes(BitSize);
+ if (Mask == 0)
+ return false;
+
+ // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of
+ // the msb and End specifies the index of the lsb.
+ unsigned LSB, Length;
+ if (isStringOfOnes(Mask, LSB, Length)) {
+ Start = 63 - (LSB + Length - 1);
+ End = 63 - LSB;
+ return true;
+ }
+
+ // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb
+ // of the low 1s and End specifies the lsb of the high 1s.
+ if (isStringOfOnes(Mask ^ allOnes(BitSize), LSB, Length)) {
+ assert(LSB > 0 && "Bottom bit must be set");
+ assert(LSB + Length < BitSize && "Top bit must be set");
+ Start = 63 - (LSB - 1);
+ End = 63 - (LSB + Length);
+ return true;
+ }
+
+ return false;
+}
+
+unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
+ SystemZII::FusedCompareType Type,
+ const MachineInstr *MI) const {
+ switch (Opcode) {
+ case SystemZ::CHI:
+ case SystemZ::CGHI:
+ if (!(MI && isInt<8>(MI->getOperand(1).getImm())))
+ return 0;
+ break;
+ case SystemZ::CLFI:
+ case SystemZ::CLGFI:
+ if (!(MI && isUInt<8>(MI->getOperand(1).getImm())))
+ return 0;
+ break;
+ case SystemZ::CL:
+ case SystemZ::CLG:
+ if (!STI.hasMiscellaneousExtensions())
+ return 0;
+ if (!(MI && MI->getOperand(3).getReg() == 0))
+ return 0;
+ break;
+ }
+ switch (Type) {
+ case SystemZII::CompareAndBranch:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRJ;
+ case SystemZ::CGR:
+ return SystemZ::CGRJ;
+ case SystemZ::CHI:
+ return SystemZ::CIJ;
+ case SystemZ::CGHI:
+ return SystemZ::CGIJ;
+ case SystemZ::CLR:
+ return SystemZ::CLRJ;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRJ;
+ case SystemZ::CLFI:
+ return SystemZ::CLIJ;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIJ;
+ default:
+ return 0;
+ }
+ case SystemZII::CompareAndReturn:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRBReturn;
+ case SystemZ::CGR:
+ return SystemZ::CGRBReturn;
+ case SystemZ::CHI:
+ return SystemZ::CIBReturn;
+ case SystemZ::CGHI:
+ return SystemZ::CGIBReturn;
+ case SystemZ::CLR:
+ return SystemZ::CLRBReturn;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRBReturn;
+ case SystemZ::CLFI:
+ return SystemZ::CLIBReturn;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIBReturn;
+ default:
+ return 0;
+ }
+ case SystemZII::CompareAndSibcall:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRBCall;
+ case SystemZ::CGR:
+ return SystemZ::CGRBCall;
+ case SystemZ::CHI:
+ return SystemZ::CIBCall;
+ case SystemZ::CGHI:
+ return SystemZ::CGIBCall;
+ case SystemZ::CLR:
+ return SystemZ::CLRBCall;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRBCall;
+ case SystemZ::CLFI:
+ return SystemZ::CLIBCall;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIBCall;
+ default:
+ return 0;
+ }
+ case SystemZII::CompareAndTrap:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRT;
+ case SystemZ::CGR:
+ return SystemZ::CGRT;
+ case SystemZ::CHI:
+ return SystemZ::CIT;
+ case SystemZ::CGHI:
+ return SystemZ::CGIT;
+ case SystemZ::CLR:
+ return SystemZ::CLRT;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRT;
+ case SystemZ::CLFI:
+ return SystemZ::CLFIT;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIT;
+ case SystemZ::CL:
+ return SystemZ::CLT;
+ case SystemZ::CLG:
+ return SystemZ::CLGT;
+ default:
+ return 0;
+ }
+ }
+ return 0;
+}
+
+unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
+ if (!STI.hasLoadAndTrap())
+ return 0;
+ switch (Opcode) {
+ case SystemZ::L:
+ case SystemZ::LY:
+ return SystemZ::LAT;
+ case SystemZ::LG:
+ return SystemZ::LGAT;
+ case SystemZ::LFH:
+ return SystemZ::LFHAT;
+ case SystemZ::LLGF:
+ return SystemZ::LLGFAT;
+ case SystemZ::LLGT:
+ return SystemZ::LLGTAT;
+ }
+ return 0;
+}
+
+void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Reg, uint64_t Value) const {
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned Opcode;
+ if (isInt<16>(Value))
+ Opcode = SystemZ::LGHI;
+ else if (SystemZ::isImmLL(Value))
+ Opcode = SystemZ::LLILL;
+ else if (SystemZ::isImmLH(Value)) {
+ Opcode = SystemZ::LLILH;
+ Value >>= 16;
+ } else {
+ assert(isInt<32>(Value) && "Huge values not handled yet");
+ Opcode = SystemZ::LGFI;
+ }
+ BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+}
+
+bool SystemZInstrInfo::
+areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ AliasAnalysis *AA) const {
+
+ if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand())
+ return false;
+
+ // If mem-operands show that the same address Value is used by both
+ // instructions, check for non-overlapping offsets and widths. Not
+ // sure if a register based analysis would be an improvement...
+
+ MachineMemOperand *MMOa = *MIa.memoperands_begin();
+ MachineMemOperand *MMOb = *MIb.memoperands_begin();
+ const Value *VALa = MMOa->getValue();
+ const Value *VALb = MMOb->getValue();
+ bool SameVal = (VALa && VALb && (VALa == VALb));
+ if (!SameVal) {
+ const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+ const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+ if (PSVa && PSVb && (PSVa == PSVb))
+ SameVal = true;
+ }
+ if (SameVal) {
+ int OffsetA = MMOa->getOffset(), OffsetB = MMOb->getOffset();
+ int WidthA = MMOa->getSize(), WidthB = MMOb->getSize();
+ int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+ int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowOffset + LowWidth <= HighOffset)
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 000000000000..b8be1f5f3921
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,326 @@
+//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZSubtarget;
+
+namespace SystemZII {
+
+enum {
+ // See comments in SystemZInstrFormats.td.
+ SimpleBDXLoad = (1 << 0),
+ SimpleBDXStore = (1 << 1),
+ Has20BitOffset = (1 << 2),
+ HasIndex = (1 << 3),
+ Is128Bit = (1 << 4),
+ AccessSizeMask = (31 << 5),
+ AccessSizeShift = 5,
+ CCValuesMask = (15 << 10),
+ CCValuesShift = 10,
+ CompareZeroCCMaskMask = (15 << 14),
+ CompareZeroCCMaskShift = 14,
+ CCMaskFirst = (1 << 18),
+ CCMaskLast = (1 << 19),
+ IsLogical = (1 << 20)
+};
+
+static inline unsigned getAccessSize(unsigned int Flags) {
+ return (Flags & AccessSizeMask) >> AccessSizeShift;
+}
+
+static inline unsigned getCCValues(unsigned int Flags) {
+ return (Flags & CCValuesMask) >> CCValuesShift;
+}
+
+static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
+ return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift;
+}
+
+// SystemZ MachineOperand target flags.
+enum {
+ // Masks out the bits for the access model.
+ MO_SYMBOL_MODIFIER = (3 << 0),
+
+ // @GOT (aka @GOTENT)
+ MO_GOT = (1 << 0),
+
+ // @INDNTPOFF
+ MO_INDNTPOFF = (2 << 0)
+};
+
+// Classifies a branch.
+enum BranchType {
+ // An instruction that branches on the current value of CC.
+ BranchNormal,
+
+ // An instruction that peforms a 32-bit signed comparison and branches
+ // on the result.
+ BranchC,
+
+ // An instruction that peforms a 32-bit unsigned comparison and branches
+ // on the result.
+ BranchCL,
+
+ // An instruction that peforms a 64-bit signed comparison and branches
+ // on the result.
+ BranchCG,
+
+ // An instruction that peforms a 64-bit unsigned comparison and branches
+ // on the result.
+ BranchCLG,
+
+ // An instruction that decrements a 32-bit register and branches if
+ // the result is nonzero.
+ BranchCT,
+
+ // An instruction that decrements a 64-bit register and branches if
+ // the result is nonzero.
+ BranchCTG
+};
+
+// Information about a branch instruction.
+struct Branch {
+ // The type of the branch.
+ BranchType Type;
+
+ // CCMASK_<N> is set if CC might be equal to N.
+ unsigned CCValid;
+
+ // CCMASK_<N> is set if the branch should be taken when CC == N.
+ unsigned CCMask;
+
+ // The target of the branch.
+ const MachineOperand *Target;
+
+ Branch(BranchType type, unsigned ccValid, unsigned ccMask,
+ const MachineOperand *target)
+ : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
+};
+
+// Kinds of fused compares in compare-and-* instructions. Together with type
+// of the converted compare, this identifies the compare-and-*
+// instruction.
+enum FusedCompareType {
+ // Relative branch - CRJ etc.
+ CompareAndBranch,
+
+ // Indirect branch, used for return - CRBReturn etc.
+ CompareAndReturn,
+
+ // Indirect branch, used for sibcall - CRBCall etc.
+ CompareAndSibcall,
+
+ // Trap
+ CompareAndTrap
+};
+
+} // end namespace SystemZII
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+ const SystemZRegisterInfo RI;
+ SystemZSubtarget &STI;
+
+ void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
+ void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
+ void expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode,
+ bool ConvertHigh) const;
+ void expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned LowOpcodeK, unsigned HighOpcode) const;
+ void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned Size) const;
+ void expandLoadStackGuard(MachineInstr *MI) const;
+
+ MachineInstrBuilder
+ emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc,
+ bool UndefSrc) const;
+
+ virtual void anchor();
+
+protected:
+ /// Commutes the operands in the given instruction by changing the operands
+ /// order and/or changing the instruction's opcode and/or the immediate value
+ /// operand.
+ ///
+ /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
+ /// to be commuted.
+ ///
+ /// Do not call this method for a non-commutable instruction or
+ /// non-commutable operands.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+ unsigned CommuteOpIdx1,
+ unsigned CommuteOpIdx2) const override;
+
+public:
+ explicit SystemZInstrInfo(SystemZSubtarget &STI);
+
+ // Override TargetInstrInfo.
+ unsigned isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const override;
+ bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex,
+ int &SrcFrameIndex) const override;
+ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+ unsigned removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved = nullptr) const override;
+ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL,
+ int *BytesAdded = nullptr) const override;
+ bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &Mask, int &Value) const override;
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int Mask, int Value,
+ const MachineRegisterInfo *MRI) const override;
+ bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
+ unsigned, unsigned, int&, int&, int&) const override;
+ void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, unsigned DstReg,
+ ArrayRef<MachineOperand> Cond, unsigned TrueReg,
+ unsigned FalseReg) const override;
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ BranchProbability Probability) const override;
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumCyclesT, unsigned ExtraPredCyclesT,
+ MachineBasicBlock &FMBB,
+ unsigned NumCyclesF, unsigned ExtraPredCyclesF,
+ BranchProbability Probability) const override;
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override;
+ bool PredicateInstruction(MachineInstr &MI,
+ ArrayRef<MachineOperand> Pred) const override;
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+ MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineInstr &MI,
+ LiveVariables *LV) const override;
+ MachineInstr *
+ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex,
+ LiveIntervals *LIS = nullptr) const override;
+ MachineInstr *foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS = nullptr) const override;
+ bool expandPostRAPseudo(MachineInstr &MBBI) const override;
+ bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+ override;
+
+ // Return the SystemZRegisterInfo, which this class owns.
+ const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+ // Return the size in bytes of MI.
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
+
+ // Return true if MI is a conditional or unconditional branch.
+ // When returning true, set Cond to the mask of condition-code
+ // values on which the instruction will branch, and set Target
+ // to the operand that contains the branch target. This target
+ // can be a register or a basic block.
+ SystemZII::Branch getBranchInfo(const MachineInstr &MI) const;
+
+ // Get the load and store opcodes for a given register class.
+ void getLoadStoreOpcodes(const TargetRegisterClass *RC,
+ unsigned &LoadOpcode, unsigned &StoreOpcode) const;
+
+ // Opcode is the opcode of an instruction that has an address operand,
+ // and the caller wants to perform that instruction's operation on an
+ // address that has displacement Offset. Return the opcode of a suitable
+ // instruction (which might be Opcode itself) or 0 if no such instruction
+ // exists.
+ unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+
+ // If Opcode is a load instruction that has a LOAD AND TEST form,
+ // return the opcode for the testing form, otherwise return 0.
+ unsigned getLoadAndTest(unsigned Opcode) const;
+
+ // Return true if ROTATE AND ... SELECTED BITS can be used to select bits
+ // Mask of the R2 operand, given that only the low BitSize bits of Mask are
+ // significant. Set Start and End to the I3 and I4 operands if so.
+ bool isRxSBGMask(uint64_t Mask, unsigned BitSize,
+ unsigned &Start, unsigned &End) const;
+
+ // If Opcode is a COMPARE opcode for which an associated fused COMPARE AND *
+ // operation exists, return the opcode for the latter, otherwise return 0.
+ // MI, if nonnull, is the compare instruction.
+ unsigned getFusedCompare(unsigned Opcode,
+ SystemZII::FusedCompareType Type,
+ const MachineInstr *MI = nullptr) const;
+
+ // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
+ // operation exists, returh the opcode for the latter, otherwise return 0.
+ unsigned getLoadAndTrap(unsigned Opcode) const;
+
+ // Emit code before MBBI in MI to move immediate value Value into
+ // physical register Reg.
+ void loadImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Reg, uint64_t Value) const;
+
+ // Sometimes, it is possible for the target to tell, even without
+ // aliasing information, that two MIs access different memory
+ // addresses. This function returns true if two MIs access different
+ // memory addresses and false otherwise.
+ bool
+ areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ AliasAnalysis *AA = nullptr) const override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 000000000000..98f66c29ae64
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,2117 @@
+//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+let hasNoSchedulingInfo = 1 in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ [(callseq_start timm:$amt1, timm:$amt2)]>;
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let hasSideEffects = 0 in {
+ // Takes as input the value of the stack pointer after a dynamic allocation
+ // has been made. Sets the output to the address of the dynamically-
+ // allocated area itself, skipping the outgoing arguments.
+ //
+ // This expands to an LA or LAY instruction. We restrict the offset
+ // to the range of LA and keep the LAY range in reserve for when
+ // the size of the outgoing arguments is added.
+ def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
+ [(set GR64:$dst, dynalloc12only:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Conditional branches.
+let isBranch = 1, isTerminator = 1, Uses = [CC] in {
+ // It's easier for LLVM to handle these branches in their raw BRC/BRCL form
+ // with the condition-code mask being the first operand. It seems friendlier
+ // to use mnemonic forms like JE and JLH when writing out the assembly though.
+ let isCodeGenOnly = 1 in {
+ // An assembler extended mnemonic for BRC.
+ def BRC : CondBranchRI <"j#", 0xA74, z_br_ccmask>;
+ // An assembler extended mnemonic for BRCL. (The extension is "G"
+ // rather than "L" because "JL" is "Jump if Less".)
+ def BRCL : CondBranchRIL<"jg#", 0xC04>;
+ let isIndirectBranch = 1 in {
+ def BC : CondBranchRX<"b#", 0x47>;
+ def BCR : CondBranchRR<"b#r", 0x07>;
+ }
+ }
+
+ // Allow using the raw forms directly from the assembler (and occasional
+ // special code generation needs) as well.
+ def BRCAsm : AsmCondBranchRI <"brc", 0xA74>;
+ def BRCLAsm : AsmCondBranchRIL<"brcl", 0xC04>;
+ let isIndirectBranch = 1 in {
+ def BCAsm : AsmCondBranchRX<"bc", 0x47>;
+ def BCRAsm : AsmCondBranchRR<"bcr", 0x07>;
+ }
+
+ // Define AsmParser extended mnemonics for each general condition-code mask
+ // (integer or floating-point)
+ foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+ "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+ def JAsm#V : FixedCondBranchRI <CV<V>, "j#", 0xA74>;
+ def JGAsm#V : FixedCondBranchRIL<CV<V>, "jg#", 0xC04>;
+ let isIndirectBranch = 1 in {
+ def BAsm#V : FixedCondBranchRX <CV<V>, "b#", 0x47>;
+ def BRAsm#V : FixedCondBranchRR <CV<V>, "b#r", 0x07>;
+ }
+ }
+}
+
+// Unconditional branches. These are in fact simply variants of the
+// conditional branches with the condition mask set to "always".
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def J : FixedCondBranchRI <CondAlways, "j", 0xA74, br>;
+ def JG : FixedCondBranchRIL<CondAlways, "jg", 0xC04>;
+ let isIndirectBranch = 1 in {
+ def B : FixedCondBranchRX<CondAlways, "b", 0x47>;
+ def BR : FixedCondBranchRR<CondAlways, "br", 0x07, brind>;
+ }
+}
+
+// NOPs. These are again variants of the conditional branches,
+// with the condition mask set to "never".
+def NOP : InstAlias<"nop\t$XBD", (BCAsm 0, bdxaddr12only:$XBD), 0>;
+def NOPR : InstAlias<"nopr\t$R", (BCRAsm 0, GR64:$R), 0>;
+
+// Fused compare-and-branch instructions.
+//
+// These instructions do not use or clobber the condition codes.
+// We nevertheless pretend that the relative compare-and-branch
+// instructions clobber CC, so that we can lower them to separate
+// comparisons and BRCLs if the branch ends up being out of range.
+let isBranch = 1, isTerminator = 1 in {
+ // As for normal branches, we handle these instructions internally in
+ // their raw CRJ-like form, but use assembly macros like CRJE when writing
+ // them out. Using the *Pair multiclasses, we also create the raw forms.
+ let Defs = [CC] in {
+ defm CRJ : CmpBranchRIEbPair<"crj", 0xEC76, GR32>;
+ defm CGRJ : CmpBranchRIEbPair<"cgrj", 0xEC64, GR64>;
+ defm CIJ : CmpBranchRIEcPair<"cij", 0xEC7E, GR32, imm32sx8>;
+ defm CGIJ : CmpBranchRIEcPair<"cgij", 0xEC7C, GR64, imm64sx8>;
+ defm CLRJ : CmpBranchRIEbPair<"clrj", 0xEC77, GR32>;
+ defm CLGRJ : CmpBranchRIEbPair<"clgrj", 0xEC65, GR64>;
+ defm CLIJ : CmpBranchRIEcPair<"clij", 0xEC7F, GR32, imm32zx8>;
+ defm CLGIJ : CmpBranchRIEcPair<"clgij", 0xEC7D, GR64, imm64zx8>;
+ }
+ let isIndirectBranch = 1 in {
+ defm CRB : CmpBranchRRSPair<"crb", 0xECF6, GR32>;
+ defm CGRB : CmpBranchRRSPair<"cgrb", 0xECE4, GR64>;
+ defm CIB : CmpBranchRISPair<"cib", 0xECFE, GR32, imm32sx8>;
+ defm CGIB : CmpBranchRISPair<"cgib", 0xECFC, GR64, imm64sx8>;
+ defm CLRB : CmpBranchRRSPair<"clrb", 0xECF7, GR32>;
+ defm CLGRB : CmpBranchRRSPair<"clgrb", 0xECE5, GR64>;
+ defm CLIB : CmpBranchRISPair<"clib", 0xECFF, GR32, imm32zx8>;
+ defm CLGIB : CmpBranchRISPair<"clgib", 0xECFD, GR64, imm64zx8>;
+ }
+
+ // Define AsmParser mnemonics for each integer condition-code mask.
+ foreach V = [ "E", "H", "L", "HE", "LE", "LH",
+ "NE", "NH", "NL", "NHE", "NLE", "NLH" ] in {
+ let Defs = [CC] in {
+ def CRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "crj", 0xEC76, GR32>;
+ def CGRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "cgrj", 0xEC64, GR64>;
+ def CIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "cij", 0xEC7E, GR32,
+ imm32sx8>;
+ def CGIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "cgij", 0xEC7C, GR64,
+ imm64sx8>;
+ def CLRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "clrj", 0xEC77, GR32>;
+ def CLGRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "clgrj", 0xEC65, GR64>;
+ def CLIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "clij", 0xEC7F, GR32,
+ imm32zx8>;
+ def CLGIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "clgij", 0xEC7D, GR64,
+ imm64zx8>;
+ }
+ let isIndirectBranch = 1 in {
+ def CRBAsm#V : FixedCmpBranchRRS<ICV<V>, "crb", 0xECF6, GR32>;
+ def CGRBAsm#V : FixedCmpBranchRRS<ICV<V>, "cgrb", 0xECE4, GR64>;
+ def CIBAsm#V : FixedCmpBranchRIS<ICV<V>, "cib", 0xECFE, GR32,
+ imm32sx8>;
+ def CGIBAsm#V : FixedCmpBranchRIS<ICV<V>, "cgib", 0xECFC, GR64,
+ imm64sx8>;
+ def CLRBAsm#V : FixedCmpBranchRRS<ICV<V>, "clrb", 0xECF7, GR32>;
+ def CLGRBAsm#V : FixedCmpBranchRRS<ICV<V>, "clgrb", 0xECE5, GR64>;
+ def CLIBAsm#V : FixedCmpBranchRIS<ICV<V>, "clib", 0xECFF, GR32,
+ imm32zx8>;
+ def CLGIBAsm#V : FixedCmpBranchRIS<ICV<V>, "clgib", 0xECFD, GR64,
+ imm64zx8>;
+ }
+ }
+}
+
+// Decrement a register and branch if it is nonzero. These don't clobber CC,
+// but we might need to split long relative branches into sequences that do.
+let isBranch = 1, isTerminator = 1 in {
+ let Defs = [CC] in {
+ def BRCT : BranchUnaryRI<"brct", 0xA76, GR32>;
+ def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>;
+ }
+ // This doesn't need to clobber CC since we never need to split it.
+ def BRCTH : BranchUnaryRIL<"brcth", 0xCC6, GRH32>,
+ Requires<[FeatureHighWord]>;
+
+ def BCT : BranchUnaryRX<"bct", 0x46,GR32>;
+ def BCTR : BranchUnaryRR<"bctr", 0x06, GR32>;
+ def BCTG : BranchUnaryRXY<"bctg", 0xE346, GR64>;
+ def BCTGR : BranchUnaryRRE<"bctgr", 0xB946, GR64>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ let Defs = [CC] in {
+ def BRXH : BranchBinaryRSI<"brxh", 0x84, GR32>;
+ def BRXLE : BranchBinaryRSI<"brxle", 0x85, GR32>;
+ def BRXHG : BranchBinaryRIEe<"brxhg", 0xEC44, GR64>;
+ def BRXLG : BranchBinaryRIEe<"brxlg", 0xEC45, GR64>;
+ }
+ def BXH : BranchBinaryRS<"bxh", 0x86, GR32>;
+ def BXLE : BranchBinaryRS<"bxle", 0x87, GR32>;
+ def BXHG : BranchBinaryRSY<"bxhg", 0xEB44, GR64>;
+ def BXLEG : BranchBinaryRSY<"bxleg", 0xEB45, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Unconditional trap.
+let hasCtrlDep = 1 in
+ def Trap : Alias<4, (outs), (ins), [(trap)]>;
+
+// Conditional trap.
+let hasCtrlDep = 1, Uses = [CC] in
+ def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
+
+// Fused compare-and-trap instructions.
+let hasCtrlDep = 1 in {
+ // These patterns work the same way as for compare-and-branch.
+ defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>;
+ defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>;
+ defm CLRT : CmpBranchRRFcPair<"clrt", 0xB973, GR32>;
+ defm CLGRT : CmpBranchRRFcPair<"clgrt", 0xB961, GR64>;
+ defm CIT : CmpBranchRIEaPair<"cit", 0xEC72, GR32, imm32sx16>;
+ defm CGIT : CmpBranchRIEaPair<"cgit", 0xEC70, GR64, imm64sx16>;
+ defm CLFIT : CmpBranchRIEaPair<"clfit", 0xEC73, GR32, imm32zx16>;
+ defm CLGIT : CmpBranchRIEaPair<"clgit", 0xEC71, GR64, imm64zx16>;
+ let Predicates = [FeatureMiscellaneousExtensions] in {
+ defm CLT : CmpBranchRSYbPair<"clt", 0xEB23, GR32>;
+ defm CLGT : CmpBranchRSYbPair<"clgt", 0xEB2B, GR64>;
+ }
+
+ foreach V = [ "E", "H", "L", "HE", "LE", "LH",
+ "NE", "NH", "NL", "NHE", "NLE", "NLH" ] in {
+ def CRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "crt", 0xB972, GR32>;
+ def CGRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "cgrt", 0xB960, GR64>;
+ def CLRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "clrt", 0xB973, GR32>;
+ def CLGRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "clgrt", 0xB961, GR64>;
+ def CITAsm#V : FixedCmpBranchRIEa<ICV<V>, "cit", 0xEC72, GR32,
+ imm32sx16>;
+ def CGITAsm#V : FixedCmpBranchRIEa<ICV<V>, "cgit", 0xEC70, GR64,
+ imm64sx16>;
+ def CLFITAsm#V : FixedCmpBranchRIEa<ICV<V>, "clfit", 0xEC73, GR32,
+ imm32zx16>;
+ def CLGITAsm#V : FixedCmpBranchRIEa<ICV<V>, "clgit", 0xEC71, GR64,
+ imm64zx16>;
+ let Predicates = [FeatureMiscellaneousExtensions] in {
+ def CLTAsm#V : FixedCmpBranchRSYb<ICV<V>, "clt", 0xEB23, GR32>;
+ def CLGTAsm#V : FixedCmpBranchRSYb<ICV<V>, "clgt", 0xEB2B, GR64>;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Define the general form of the call instructions for the asm parser.
+// These instructions don't hard-code %r14 as the return address register.
+let isCall = 1, Defs = [CC] in {
+ def BRAS : CallRI <"bras", 0xA75>;
+ def BRASL : CallRIL<"brasl", 0xC05>;
+ def BAS : CallRX <"bas", 0x4D>;
+ def BASR : CallRR <"basr", 0x0D>;
+}
+
+// Regular calls.
+let isCall = 1, Defs = [R14D, CC] in {
+ def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
+ [(z_call pcrel32:$I2)]>;
+ def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
+ [(z_call ADDR64:$R2)]>;
+}
+
+// TLS calls. These will be lowered into a call to __tls_get_offset,
+// with an extra relocation specifying the TLS symbol.
+let isCall = 1, Defs = [R14D, CC] in {
+ def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+ [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
+ def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+ [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
+}
+
+// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards
+// are argument registers and since branching to R0 is a no-op.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def CallJG : Alias<6, (outs), (ins pcrel32:$I2),
+ [(z_sibcall pcrel32:$I2)]>;
+ let Uses = [R1D] in
+ def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
+}
+
+// Conditional sibling calls.
+let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in {
+ def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1,
+ pcrel32:$I2), []>;
+ let Uses = [R1D] in
+ def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
+// Fused compare and conditional sibling calls.
+let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in {
+ def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+ def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+ def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+ def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
+// A return instruction (br %r14).
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+ def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
+
+// A conditional return instruction (bcr <cond>, %r14).
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
+ def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+
+// Fused compare and conditional returns.
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+ def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+ def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+ def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+// We don't define 32-bit Mux stores if we don't have STOCFH, because the
+// low-only STOC should then always be used if possible.
+defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore32Mux : CondStores<GRX32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>,
+ Requires<[FeatureLoadStoreOnCond2]>;
+defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm CondStore32 : CondStores<GR32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
+defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
+ nonvolatile_anyextloadi32, bdxaddr20only>;
+defm CondStore64 : CondStores<GR64, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Register moves.
+let hasSideEffects = 0 in {
+ // Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
+ def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>;
+ def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
+}
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
+ def LTR : UnaryRR <"ltr", 0x12, null_frag, GR32, GR32>;
+ def LTGR : UnaryRRE<"ltgr", 0xB902, null_frag, GR64, GR64>;
+}
+
+// Immediate moves.
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+ // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF,
+ // deopending on the choice of register.
+ def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>,
+ Requires<[FeatureHighWord]>;
+ def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>;
+ def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
+
+ // Other 16-bit immediates.
+ def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>;
+ def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>;
+ def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>;
+ def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>;
+
+ // 32-bit immediates.
+ def LGFI : UnaryRIL<"lgfi", 0xC01, bitconvert, GR64, imm64sx32>;
+ def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>;
+ def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>;
+}
+
+// Register loads.
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+ // Expands to L, LY or LFH, depending on the choice of register.
+ def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
+ defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>;
+ def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
+ def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src),
+ [(set GR128:$dst, (load bdxaddr20only128:$src))]>;
+ }
+}
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
+ def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>;
+ def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>;
+}
+
+let canFoldAsLoad = 1 in {
+ def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
+ def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+}
+
+// Load and zero rightmost byte.
+let Predicates = [FeatureLoadAndZeroRightmostByte] in {
+ def LZRF : UnaryRXY<"lzrf", 0xE33B, null_frag, GR32, 4>;
+ def LZRG : UnaryRXY<"lzrg", 0xE32A, null_frag, GR64, 8>;
+ def : Pat<(and (i32 (load bdxaddr20only:$src)), 0xffffff00),
+ (LZRF bdxaddr20only:$src)>;
+ def : Pat<(and (i64 (load bdxaddr20only:$src)), 0xffffffffffffff00),
+ (LZRG bdxaddr20only:$src)>;
+}
+
+// Load and trap.
+let Predicates = [FeatureLoadAndTrap] in {
+ def LAT : UnaryRXY<"lat", 0xE39F, null_frag, GR32, 4>;
+ def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>;
+ def LGAT : UnaryRXY<"lgat", 0xE385, null_frag, GR64, 8>;
+}
+
+// Register stores.
+let SimpleBDXStore = 1 in {
+ // Expands to ST, STY or STFH, depending on the choice of register.
+ def STMux : StoreRXYPseudo<store, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
+ defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
+ def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
+ def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst),
+ [(store GR128:$src, bdxaddr20only128:$dst)]>;
+ }
+}
+def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
+
+// 8-bit immediate stores to 8-bit fields.
+defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
+
+// 16-bit immediate stores to 16-, 32- or 64-bit fields.
+def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>;
+def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>;
+def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
+
+// Memory-to-memory moves.
+let mayLoad = 1, mayStore = 1 in
+ defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def MVCL : SideEffectBinaryMemMemRR<"mvcl", 0x0E, GR128, GR128>;
+ def MVCLE : SideEffectTernaryMemMemRS<"mvcle", 0xA8, GR128, GR128>;
+ def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
+}
+
+// String moves.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
+ defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
+ // Load immediate on condition. Matched via DAG pattern and created
+ // by the PeepholeOptimizer via FoldImmediate.
+ let hasSideEffects = 0 in {
+ // Expands to LOCHI or LOCHHI, depending on the choice of register.
+ def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>;
+ defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>;
+ defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
+ defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
+ }
+
+ // Move register on condition. Expanded from Select* pseudos and
+ // created by early if-conversion.
+ let hasSideEffects = 0, isCommutable = 1 in {
+ // Expands to LOCR or LOCFHR or a branch-and-move sequence,
+ // depending on the choice of registers.
+ def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
+ defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
+ }
+
+ // Load on condition. Matched via DAG pattern.
+ // Expands to LOC or LOCFH, depending on the choice of register.
+ def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>;
+ defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>;
+
+ // Store on condition. Expanded from CondStore* pseudos.
+ // Expands to STOC or STOCFH, depending on the choice of register.
+ def STOCMux : CondStoreRSYPseudo<GRX32, 4>;
+ defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>;
+
+ // Define AsmParser extended mnemonics for each general condition-code mask.
+ foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+ "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+ def LOCHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochi", 0xEC42, GR32,
+ imm32sx16>;
+ def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64,
+ imm64sx16>;
+ def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32,
+ imm32sx16>;
+ def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>;
+ def LOCFHAsm#V : FixedCondUnaryRSY<CV<V>, "locfh", 0xEBE0, GRH32, 4>;
+ def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>;
+ }
+}
+
+let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
+ // Move register on condition. Expanded from Select* pseudos and
+ // created by early if-conversion.
+ let hasSideEffects = 0, isCommutable = 1 in {
+ defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>;
+ defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
+ }
+
+ // Load on condition. Matched via DAG pattern.
+ defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>;
+ defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, nonvolatile_load, GR64, 8>;
+
+ // Store on condition. Expanded from CondStore* pseudos.
+ defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;
+ defm STOCG : CondStoreRSYPair<"stocg", 0xEBE3, GR64, 8>;
+
+ // Define AsmParser extended mnemonics for each general condition-code mask.
+ foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+ "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+ def LOCRAsm#V : FixedCondBinaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>;
+ def LOCGRAsm#V : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>;
+ def LOCAsm#V : FixedCondUnaryRSY<CV<V>, "loc", 0xEBF2, GR32, 4>;
+ def LOCGAsm#V : FixedCondUnaryRSY<CV<V>, "locg", 0xEBE2, GR64, 8>;
+ def STOCAsm#V : FixedCondStoreRSY<CV<V>, "stoc", 0xEBF3, GR32, 4>;
+ def STOCGAsm#V : FixedCondStoreRSY<CV<V>, "stocg", 0xEBE3, GR64, 8>;
+ }
+}
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+//
+// Note that putting these before zero extensions mean that we will prefer
+// them for anyextload*. There's not really much to choose between the two
+// either way, but signed-extending loads have a short LH and a long LHY,
+// while zero-extending loads have only the long LLH.
+//
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let hasSideEffects = 0 in {
+ def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>;
+ def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let hasSideEffects = 0 in {
+ def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>;
+ def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
+ def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
+}
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LTGFR : UnaryRRE<"ltgfr", 0xB912, null_frag, GR64, GR32>;
+
+// Match 32-to-64-bit sign extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(sext_inreg GR64:$src, i32),
+ (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+
+// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH,
+// depending on the choice of register.
+def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>;
+def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH,
+// depending on the choice of register.
+def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>;
+def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>;
+def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>;
+def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>;
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let hasSideEffects = 0 in {
+ // Expands to LLCR or RISB[LH]G, depending on the choice of registers.
+ def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>;
+ // Expands to LLHR or RISB[LH]G, depending on the choice of registers.
+ def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let hasSideEffects = 0 in {
+ def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>;
+ def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
+ def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit zero extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(and GR64:$src, 0xffffffff),
+ (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+
+// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH,
+// depending on the choice of register.
+def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
+// depending on the choice of register.
+def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
+def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>;
+def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>;
+def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>;
+
+// 31-to-64-bit zero extensions.
+def LLGTR : UnaryRRE<"llgtr", 0xB917, null_frag, GR64, GR64>;
+def LLGT : UnaryRXY<"llgt", 0xE317, null_frag, GR64, 4>;
+def : Pat<(and GR64:$src, 0x7fffffff),
+ (LLGTR GR64:$src)>;
+def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0x7fffffff),
+ (LLGT bdxaddr20only:$src)>;
+
+// Load and zero rightmost byte.
+let Predicates = [FeatureLoadAndZeroRightmostByte] in {
+ def LLZRGF : UnaryRXY<"llzrgf", 0xE33A, null_frag, GR64, 4>;
+ def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0xffffff00),
+ (LLZRGF bdxaddr20only:$src)>;
+}
+
+// Load and trap.
+let Predicates = [FeatureLoadAndTrap] in {
+ def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>;
+ def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>;
+}
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+// Truncations of 64-bit registers to 32-bit registers.
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, subreg_l32)>;
+
+// Truncations of 32-bit registers to 8-bit memory. STCMux expands to
+// STC, STCY or STCH, depending on the choice of register.
+def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>;
+def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// Truncations of 32-bit registers to 16-bit memory. STHMux expands to
+// STH, STHY or STHH, depending on the choice of register.
+def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>;
+def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
+
+// Truncations of 64-bit registers to memory.
+defm : StoreGR64Pair<STC, STCY, truncstorei8>;
+defm : StoreGR64Pair<STH, STHY, truncstorei16>;
+def : StoreGR64PC<STHRL, aligned_truncstorei16>;
+defm : StoreGR64Pair<ST, STY, truncstorei32>;
+def : StoreGR64PC<STRL, aligned_truncstorei32>;
+
+// Store characters under mask -- not (yet) used for codegen.
+defm STCM : StoreBinaryRSPair<"stcm", 0xBE, 0xEB2D, GR32, 0>;
+def STCMH : StoreBinaryRSY<"stcmh", 0xEB2C, GRH32, 0>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Multi-register loads.
+defm LM : LoadMultipleRSPair<"lm", 0x98, 0xEB98, GR32>;
+def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>;
+def LMH : LoadMultipleRSY<"lmh", 0xEB96, GRH32>;
+def LMD : LoadMultipleSSe<"lmd", 0xEF, GR64>;
+
+// Multi-register stores.
+defm STM : StoreMultipleRSPair<"stm", 0x90, 0xEB90, GR32>;
+def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
+def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+// Byte-swapping register moves.
+let hasSideEffects = 0 in {
+ def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
+ def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
+}
+
+// Byte-swapping loads. Unlike normal loads, these instructions are
+// allowed to access storage more than once.
+def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
+def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
+
+// Likewise byte-swapping stores.
+def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
+def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
+
+// Byte-swapping memory-to-memory moves.
+let mayLoad = 1, mayStore = 1 in
+ def MVCIN : SideEffectBinarySSa<"mvcin", 0xE8>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+// Load BDX-style addresses.
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1 in
+ defm LA : LoadAddressRXPair<"la", 0x41, 0xE371, bitconvert>;
+
+// Load a PC-relative address. There's no version of this instruction
+// with a 16-bit offset, so there's no relaxation.
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in
+ def LARL : LoadAddressRIL<"larl", 0xC00, bitconvert>;
+
+// Load the Global Offset Table address. This will be lowered into a
+// larl $R1, _GLOBAL_OFFSET_TABLE_
+// instruction.
+def GOT : Alias<6, (outs GR64:$R1), (ins),
+ [(set GR64:$R1, (global_offset_table))]>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LPR : UnaryRR <"lpr", 0x10, z_iabs, GR32, GR32>;
+ def LPGR : UnaryRRE<"lpgr", 0xB900, z_iabs, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LPGFR : UnaryRRE<"lpgfr", 0xB910, null_frag, GR64, GR32>;
+}
+def : Pat<(z_iabs32 GR32:$src), (LPR GR32:$src)>;
+def : Pat<(z_iabs64 GR64:$src), (LPGR GR64:$src)>;
+defm : SXU<z_iabs, LPGFR>;
+defm : SXU<z_iabs64, LPGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LNR : UnaryRR <"lnr", 0x11, z_inegabs, GR32, GR32>;
+ def LNGR : UnaryRRE<"lngr", 0xB901, z_inegabs, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LNGFR : UnaryRRE<"lngfr", 0xB911, null_frag, GR64, GR32>;
+}
+def : Pat<(z_inegabs32 GR32:$src), (LNR GR32:$src)>;
+def : Pat<(z_inegabs64 GR64:$src), (LNGR GR64:$src)>;
+defm : SXU<z_inegabs, LNGFR>;
+defm : SXU<z_inegabs64, LNGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>;
+ def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>;
+}
+defm : SXU<ineg, LCGFR>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in
+ defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>;
+
+defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
+
+defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
+
+// Insert characters under mask -- not (yet) used for codegen.
+let Defs = [CC] in {
+ defm ICM : TernaryRSPair<"icm", 0xBF, 0xEB81, GR32, 0>;
+ def ICMH : TernaryRSY<"icmh", 0xEB80, GRH32, 0>;
+}
+
+// Insertions of a 16-bit immediate, leaving other bits unaffected.
+// We don't have or_as_insert equivalents of these operations because
+// OI is available instead.
+//
+// IIxMux expands to II[LH]x, depending on the choice of register.
+def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>;
+def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>;
+def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>;
+def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>;
+def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>;
+def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>;
+
+// ...likewise for 32-bit immediates. For GR32s this is a general
+// full-width move. (We use IILF rather than something like LLILF
+// for 32-bit moves because IILF leaves the upper 32 bits of the
+// GR64 unchanged.)
+let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
+ def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+ def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>;
+}
+def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>;
+def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>;
+
+// An alternative model of inserthf, with the first operand being
+// a zero-extended value.
+def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
+ (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ imm64hf32:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+// Plain addition.
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ // Addition of a register.
+ let isCommutable = 1 in {
+ defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, add, GR32, GR32>;
+ defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, add, GR64, GR64>;
+ }
+ def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+
+ // Addition to a high register.
+ def AHHHR : BinaryRRFa<"ahhhr", 0xB9C8, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def AHHLR : BinaryRRFa<"ahhlr", 0xB9D8, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
+ // Addition of signed 16-bit immediates.
+ defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
+ defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>;
+ defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>;
+
+ // Addition of signed 32-bit immediates.
+ def AFIMux : BinaryRIPseudo<add, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>;
+ def AIH : BinaryRIL<"aih", 0xCC8, add, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
+
+ // Addition of memory.
+ defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>;
+ defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>;
+ def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>;
+ def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>;
+
+ // Addition to memory.
+ def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
+ def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>;
+}
+defm : SXB<add, GR64, AGFR>;
+
+// Addition producing a carry.
+let Defs = [CC] in {
+ // Addition of a register.
+ let isCommutable = 1 in {
+ defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, addc, GR32, GR32>;
+ defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, addc, GR64, GR64>;
+ }
+ def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+
+ // Addition to a high register.
+ def ALHHHR : BinaryRRFa<"alhhhr", 0xB9CA, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def ALHHLR : BinaryRRFa<"alhhlr", 0xB9DA, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
+ // Addition of signed 16-bit immediates.
+ def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>,
+ Requires<[FeatureDistinctOps]>;
+ def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>,
+ Requires<[FeatureDistinctOps]>;
+
+ // Addition of unsigned 32-bit immediates.
+ def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>;
+ def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+
+ // Addition of signed 32-bit immediates.
+ def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+
+ // Addition of memory.
+ defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
+ def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
+ def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>;
+
+ // Addition to memory.
+ def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>;
+ def ALGSI : BinarySIY<"algsi", 0xEB7E, null_frag, imm64sx8>;
+}
+defm : ZXB<addc, GR64, ALGFR>;
+
+// Addition producing and using a carry.
+let Defs = [CC], Uses = [CC] in {
+ // Addition of a register.
+ def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>;
+ def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
+
+ // Addition of memory.
+ def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load, 4>;
+ def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>;
+}
+
+// Addition that does not modify the condition code.
+def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+// Plain subtraction. Although immediate forms exist, we use the
+// add-immediate instruction instead.
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ // Subtraction of a register.
+ defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, sub, GR32, GR32>;
+ def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
+ defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>;
+
+ // Subtraction from a high register.
+ def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def SHHLR : BinaryRRFa<"shhlr", 0xB9D9, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
+ // Subtraction of memory.
+ defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
+ defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
+ def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>;
+ def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>;
+}
+defm : SXB<sub, GR64, SGFR>;
+
+// Subtraction producing a carry.
+let Defs = [CC] in {
+ // Subtraction of a register.
+ defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, subc, GR32, GR32>;
+ def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
+ defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>;
+
+ // Subtraction from a high register.
+ def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
+ // Subtraction of unsigned 32-bit immediates. These don't match
+ // subc because we prefer addc for constants.
+ def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>;
+ def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>;
+
+ // Subtraction of memory.
+ defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>;
+ def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>;
+ def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>;
+}
+defm : ZXB<subc, GR64, SLGFR>;
+
+// Subtraction producing and using a carry.
+let Defs = [CC], Uses = [CC] in {
+ // Subtraction of a register.
+ def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>;
+ def SLBGR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
+
+ // Subtraction of memory.
+ def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>;
+ def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>;
+}
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+ // ANDs of a register.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm NR : BinaryRRAndK<"nr", 0x14, 0xB9F4, and, GR32, GR32>;
+ defm NGR : BinaryRREAndK<"ngr", 0xB980, 0xB9E4, and, GR64, GR64>;
+ }
+
+ let isConvertibleToThreeAddress = 1 in {
+ // ANDs of a 16-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 16-bit field, not the full register.
+ //
+ // NIxMux expands to NI[LH]x, depending on the choice of register.
+ def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>,
+ Requires<[FeatureHighWord]>;
+ def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>,
+ Requires<[FeatureHighWord]>;
+ def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+ def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+ def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>;
+ def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>;
+ def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>;
+ def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>;
+ def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>;
+ def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>;
+
+ // ANDs of a 32-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to NILF or NIHF, depending on the choice of register.
+ def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+ def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>;
+ }
+ def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>;
+ def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>;
+ }
+
+ // ANDs of memory.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
+ def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ }
+
+ // AND to memory
+ defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>;
+
+ // Block AND.
+ let mayLoad = 1, mayStore = 1 in
+ defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>;
+}
+defm : RMWIByte<and, bdaddr12pair, NI>;
+defm : RMWIByte<and, bdaddr20pair, NIY>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+ // ORs of a register.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm OR : BinaryRRAndK<"or", 0x16, 0xB9F6, or, GR32, GR32>;
+ defm OGR : BinaryRREAndK<"ogr", 0xB981, 0xB9E6, or, GR64, GR64>;
+ }
+
+ // ORs of a 16-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 16-bit field, not the full register.
+ //
+ // OIxMux expands to OI[LH]x, depending on the choice of register.
+ def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+ def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+ def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>;
+ def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>;
+ def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>;
+ def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>;
+ def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>;
+ def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>;
+
+ // ORs of a 32-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to OILF or OIHF, depending on the choice of register.
+ def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+ def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>;
+ }
+ def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>;
+ def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>;
+
+ // ORs of memory.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
+ def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+ }
+
+ // OR to memory
+ defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>;
+
+ // Block OR.
+ let mayLoad = 1, mayStore = 1 in
+ defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>;
+}
+defm : RMWIByte<or, bdaddr12pair, OI>;
+defm : RMWIByte<or, bdaddr20pair, OIY>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+ // XORs of a register.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm XR : BinaryRRAndK<"xr", 0x17, 0xB9F7, xor, GR32, GR32>;
+ defm XGR : BinaryRREAndK<"xgr", 0xB982, 0xB9E7, xor, GR64, GR64>;
+ }
+
+ // XORs of a 32-bit immediate, leaving other bits unaffected.
+ // The CC result only reflects the 32-bit field, which means we can
+ // use it as a zero indicator for i32 operations but not otherwise.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to XILF or XIHF, depending on the choice of register.
+ def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+ def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>;
+ }
+ def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>;
+ def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>;
+
+ // XORs of memory.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
+ def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+ }
+
+ // XOR to memory
+ defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>;
+
+ // Block XOR.
+ let mayLoad = 1, mayStore = 1 in
+ defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>;
+}
+defm : RMWIByte<xor, bdaddr12pair, XI>;
+defm : RMWIByte<xor, bdaddr20pair, XIY>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+// Multiplication of a register.
+let isCommutable = 1 in {
+ def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>;
+ def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>;
+}
+def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>;
+defm : SXB<mul, GR64, MSGFR>;
+
+// Multiplication of a signed 16-bit immediate.
+def MHI : BinaryRI<"mhi", 0xA7C, mul, GR32, imm32sx16>;
+def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>;
+
+// Multiplication of a signed 32-bit immediate.
+def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>;
+def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
+
+// Multiplication of memory.
+defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>;
+defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>;
+def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
+def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
+
+// Multiplication of a register, producing two results.
+def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>;
+def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>;
+def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
+
+// Multiplication of memory, producing two results.
+def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>;
+def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>;
+def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>;
+def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in { // Do not speculatively execute.
+ // Division and remainder, from registers.
+ def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>;
+ def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
+ def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
+ def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
+ def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
+
+ // Division and remainder, from memory.
+ def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>;
+ def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
+ def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
+ def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
+ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+// Logical shift left.
+let hasSideEffects = 0 in {
+ defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+ def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+ def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
+}
+
+// Arithmetic shift left.
+let Defs = [CC] in {
+ defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>;
+ def SLAG : BinaryRSY<"slag", 0xEB0B, null_frag, GR64>;
+ def SLDA : BinaryRS<"slda", 0x8F, null_frag, GR128>;
+}
+
+// Logical shift right.
+let hasSideEffects = 0 in {
+ defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+ def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+ def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
+}
+
+// Arithmetic shift right.
+let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
+ defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
+ def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
+ def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
+}
+
+// Rotate left.
+let hasSideEffects = 0 in {
+ def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
+ def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+}
+
+// Rotate second operand left and inserted selected bits into first operand.
+// These can act like 32-bit operands provided that the constant start and
+// end bits (operands 2 and 3) are in the range [32, 64).
+let Defs = [CC] in {
+ let isCodeGenOnly = 1 in
+ def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
+}
+
+// On zEC12 we have a variant of RISBG that does not set CC.
+let Predicates = [FeatureMiscellaneousExtensions] in
+ def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
+
+// Forms of RISBG that only affect one word of the destination register.
+// They do not set CC.
+let Predicates = [FeatureHighWord] in {
+ def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>;
+ def RISBLL : RotateSelectAliasRIEf<GR32, GR32>;
+ def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>;
+ def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>;
+ def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>;
+ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>;
+ def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>;
+}
+
+// Rotate second operand left and perform a logical operation with selected
+// bits of the first operand. The CC result only describes the selected bits,
+// so isn't useful for a full comparison against zero.
+let Defs = [CC] in {
+ def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
+ def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
+ def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+// Signed comparisons. We put these before the unsigned comparisons because
+// some of the signed forms have COMPARE AND BRANCH equivalents whereas none
+// of the unsigned forms do.
+let Defs = [CC], CCValues = 0xE in {
+ // Comparison with a register.
+ def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>;
+ def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
+ def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>;
+
+ // Comparison with a high register.
+ def CHHR : CompareRRE<"chhr", 0xB9CD, null_frag, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def CHLR : CompareRRE<"chlr", 0xB9DD, null_frag, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
+ // Comparison with a signed 16-bit immediate. CHIMux expands to CHI or CIH,
+ // depending on the choice of register.
+ def CHIMux : CompareRIPseudo<z_scmp, GRX32, imm32sx16>,
+ Requires<[FeatureHighWord]>;
+ def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>;
+ def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>;
+
+ // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH,
+ // depending on the choice of register.
+ def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>;
+ def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>;
+
+ // Comparison with memory.
+ defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>;
+ def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>;
+ def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>;
+ def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>;
+ def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>;
+ def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>;
+ def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>;
+ def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>;
+ def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>;
+ def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>;
+
+ // Comparison between memory and a signed 16-bit immediate.
+ def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>;
+ def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>;
+ def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>;
+}
+defm : SXB<z_scmp, GR64, CGFR>;
+
+// Unsigned comparisons.
+let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
+ // Comparison with a register.
+ def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>;
+ def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
+ def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>;
+
+ // Comparison with a high register.
+ def CLHHR : CompareRRE<"clhhr", 0xB9CF, null_frag, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def CLHLR : CompareRRE<"clhlr", 0xB9DF, null_frag, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
+ // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI
+ // or CLIH, depending on the choice of register.
+ def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
+ def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GRH32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
+
+ // Comparison with memory.
+ def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>;
+ def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>;
+ def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>;
+ def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32,
+ aligned_azextloadi16>;
+ def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32,
+ aligned_load>;
+ def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
+ aligned_azextloadi16>;
+ def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
+ aligned_azextloadi32>;
+ def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64,
+ aligned_load>;
+
+ // Comparison between memory and an unsigned 8-bit immediate.
+ defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>;
+
+ // Comparison between memory and an unsigned 16-bit immediate.
+ def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>;
+ def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
+ def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
+}
+defm : ZXB<z_ucmp, GR64, CLGFR>;
+
+// Memory-to-memory comparison.
+let mayLoad = 1, Defs = [CC] in {
+ defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
+ def CLCL : SideEffectBinaryMemMemRR<"clcl", 0x0F, GR128, GR128>;
+ def CLCLE : SideEffectTernaryMemMemRS<"clcle", 0xA9, GR128, GR128>;
+ def CLCLU : SideEffectTernaryMemMemRSY<"clclu", 0xEB8F, GR128, GR128>;
+}
+
+// String comparison.
+let mayLoad = 1, Defs = [CC] in
+ defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
+
+// Test under mask.
+let Defs = [CC] in {
+ // TMxMux expands to TM[LH]x, depending on the choice of register.
+ def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>;
+ def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>;
+ def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>;
+ def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>;
+
+ def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>;
+ def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>;
+ def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>;
+ def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>;
+
+ defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
+}
+
+def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>;
+def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>;
+
+// Compare logical characters under mask -- not (yet) used for codegen.
+let Defs = [CC] in {
+ defm CLM : CompareRSPair<"clm", 0xBD, 0xEB21, GR32, 0>;
+ def CLMH : CompareRSY<"clmh", 0xEB20, GRH32, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
+def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+
+let Predicates = [FeatureExecutionHint] in {
+ // Branch Prediction Preload
+ def BPP : BranchPreloadSMI<"bpp", 0xC7>;
+ def BPRP : BranchPreloadMII<"bprp", 0xC5>;
+
+ // Next Instruction Access Intent
+ def NIAI : SideEffectBinaryIE<"niai", 0xB2FA, imm32zx4, imm32zx4>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+// A serialization instruction that acts as a barrier for all memory
+// accesses, which expands to "bcr 14, 0".
+let hasSideEffects = 1 in
+def Serialize : Alias<2, (outs), (ins), []>;
+
+// A pseudo instruction that serves as a compiler barrier.
+let hasSideEffects = 1, hasNoSchedulingInfo = 1 in
+def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>;
+
+let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
+ def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>;
+ def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>;
+ def LAAL : LoadAndOpRSY<"laal", 0xEBFA, null_frag, GR32>;
+ def LAALG : LoadAndOpRSY<"laalg", 0xEBEA, null_frag, GR64>;
+ def LAN : LoadAndOpRSY<"lan", 0xEBF4, atomic_load_and_32, GR32>;
+ def LANG : LoadAndOpRSY<"lang", 0xEBE4, atomic_load_and_64, GR64>;
+ def LAO : LoadAndOpRSY<"lao", 0xEBF6, atomic_load_or_32, GR32>;
+ def LAOG : LoadAndOpRSY<"laog", 0xEBE6, atomic_load_or_64, GR64>;
+ def LAX : LoadAndOpRSY<"lax", 0xEBF7, atomic_load_xor_32, GR32>;
+ def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, atomic_load_xor_64, GR64>;
+}
+
+def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>;
+def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>;
+def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>;
+
+def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
+def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>;
+ def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
+ def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
+ def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>;
+ def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
+ def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
+}
+
+def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
+def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>;
+def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
+
+def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
+def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
+ def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32,
+ imm32ll16c>;
+ def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32,
+ imm32lh16c>;
+ def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+ def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
+ def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64ll16c>;
+ def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64lh16c>;
+ def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64hl16c>;
+ def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64hh16c>;
+ def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64lf32c>;
+ def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64hf32c>;
+}
+
+def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
+def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
+ def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+ def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+ def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+ def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
+ def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+ def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+ def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+ def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+ def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+ def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+}
+
+def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
+def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
+ def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+ def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
+ def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+ def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+}
+
+def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
+def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
+ imm32lh16c>;
+def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>;
+def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32,
+ imm32ll16c>;
+def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32,
+ imm32lh16c>;
+def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>;
+def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64ll16c>;
+def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64lh16c>;
+def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64hl16c>;
+def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64hh16c>;
+def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64lf32c>;
+def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64hf32c>;
+
+def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
+def ATOMIC_LOAD_MIN_32 : AtomicLoadBinaryReg32<atomic_load_min_32>;
+def ATOMIC_LOAD_MIN_64 : AtomicLoadBinaryReg64<atomic_load_min_64>;
+
+def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>;
+def ATOMIC_LOAD_MAX_32 : AtomicLoadBinaryReg32<atomic_load_max_32>;
+def ATOMIC_LOAD_MAX_64 : AtomicLoadBinaryReg64<atomic_load_max_64>;
+
+def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>;
+def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>;
+def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>;
+
+def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>;
+def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>;
+def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>;
+
+def ATOMIC_CMP_SWAPW
+ : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+ ADDR32:$bitshift, ADDR32:$negbitshift,
+ uimm32:$bitsize),
+ [(set GR32:$dst,
+ (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+ ADDR32:$bitshift, ADDR32:$negbitshift,
+ uimm32:$bitsize))]> {
+ let Defs = [CC];
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+// Test and set.
+let mayLoad = 1, Defs = [CC] in
+ def TS : StoreInherentS<"ts", 0x9300, null_frag, 1>;
+
+// Compare and swap.
+let Defs = [CC] in {
+ defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
+ def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
+}
+
+// Compare double and swap.
+let Defs = [CC] in {
+ defm CDS : CmpSwapRSPair<"cds", 0xBB, 0xEB31, null_frag, GR128>;
+ def CDSG : CmpSwapRSY<"cdsg", 0xEB3E, null_frag, GR128>;
+}
+
+// Compare and swap and store.
+let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad = 1 in
+ def CSST : SideEffectTernarySSF<"csst", 0xC82, GR64>;
+
+// Perform locked operation.
+let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad =1 in
+ def PLO : SideEffectQuaternarySSe<"plo", 0xEE, GR64>;
+
+// Load/store pair from/to quadword.
+def LPQ : UnaryRXY<"lpq", 0xE38F, null_frag, GR128, 16>;
+def STPQ : StoreRXY<"stpq", 0xE38E, null_frag, GR128, 16>;
+
+// Load pair disjoint.
+let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
+ def LPD : BinarySSF<"lpd", 0xC84, GR128>;
+ def LPDG : BinarySSF<"lpdg", 0xC85, GR128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1, mayStore = 1 in
+ def TR : SideEffectBinarySSa<"tr", 0xDC>;
+
+let mayLoad = 1, Defs = [CC, R0L, R1D] in {
+ def TRT : SideEffectBinarySSa<"trt", 0xDD>;
+ def TRTR : SideEffectBinarySSa<"trtr", 0xD0>;
+}
+
+let mayLoad = 1, mayStore = 1, Uses = [R0L] in
+ def TRE : SideEffectBinaryMemMemRRE<"tre", 0xB2A5, GR128, GR64>;
+
+let mayLoad = 1, Uses = [R1D], Defs = [CC] in {
+ defm TRTE : BinaryMemRRFcOpt<"trte", 0xB9BF, GR128, GR64>;
+ defm TRTRE : BinaryMemRRFcOpt<"trtre", 0xB9BD, GR128, GR64>;
+}
+
+let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
+ defm TROO : SideEffectTernaryMemMemRRFcOpt<"troo", 0xB993, GR128, GR64>;
+ defm TROT : SideEffectTernaryMemMemRRFcOpt<"trot", 0xB992, GR128, GR64>;
+ defm TRTO : SideEffectTernaryMemMemRRFcOpt<"trto", 0xB991, GR128, GR64>;
+ defm TRTT : SideEffectTernaryMemMemRRFcOpt<"trtt", 0xB990, GR128, GR64>;
+}
+
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ defm CU12 : SideEffectTernaryMemMemRRFcOpt<"cu12", 0xB2A7, GR128, GR128>;
+ defm CU14 : SideEffectTernaryMemMemRRFcOpt<"cu14", 0xB9B0, GR128, GR128>;
+ defm CU21 : SideEffectTernaryMemMemRRFcOpt<"cu21", 0xB2A6, GR128, GR128>;
+ defm CU24 : SideEffectTernaryMemMemRRFcOpt<"cu24", 0xB9B1, GR128, GR128>;
+ def CU41 : SideEffectBinaryMemMemRRE<"cu41", 0xB9B2, GR128, GR128>;
+ def CU42 : SideEffectBinaryMemMemRRE<"cu42", 0xB9B3, GR128, GR128>;
+
+ let isAsmParserOnly = 1 in {
+ defm CUUTF : SideEffectTernaryMemMemRRFcOpt<"cuutf", 0xB2A6, GR128, GR128>;
+ defm CUTFU : SideEffectTernaryMemMemRRFcOpt<"cutfu", 0xB2A7, GR128, GR128>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
+ def KM : SideEffectBinaryMemMemRRE<"km", 0xB92E, GR128, GR128>;
+ def KMC : SideEffectBinaryMemMemRRE<"kmc", 0xB92F, GR128, GR128>;
+
+ def KIMD : SideEffectBinaryMemRRE<"kimd", 0xB93E, GR64, GR128>;
+ def KLMD : SideEffectBinaryMemRRE<"klmd", 0xB93F, GR64, GR128>;
+ def KMAC : SideEffectBinaryMemRRE<"kmac", 0xB91E, GR64, GR128>;
+
+ let Predicates = [FeatureMessageSecurityAssist4] in {
+ def KMF : SideEffectBinaryMemMemRRE<"kmf", 0xB92A, GR128, GR128>;
+ def KMO : SideEffectBinaryMemMemRRE<"kmo", 0xB92B, GR128, GR128>;
+ def KMCTR : SideEffectTernaryMemMemMemRRFb<"kmctr", 0xB92D,
+ GR128, GR128, GR128>;
+ def PCC : SideEffectInherentRRE<"pcc", 0xB92C>;
+ }
+ let Predicates = [FeatureMessageSecurityAssist5] in
+ def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, load, 4>;
+def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, load, 8>;
+
+defm CVD : StoreRXPair<"cvd", 0x4E, 0xE326, null_frag, GR32, 4>;
+def CVDG : StoreRXY<"cvdg", 0xE32E, null_frag, GR64, 8>;
+
+let mayLoad = 1, mayStore = 1 in {
+ def MVN : SideEffectBinarySSa<"mvn", 0xD1>;
+ def MVZ : SideEffectBinarySSa<"mvz", 0xD3>;
+ def MVO : SideEffectBinarySSb<"mvo", 0xF1>;
+
+ def PACK : SideEffectBinarySSb<"pack", 0xF2>;
+ def PKA : SideEffectBinarySSf<"pka", 0xE9>;
+ def PKU : SideEffectBinarySSf<"pku", 0xE1>;
+ def UNPK : SideEffectBinarySSb<"unpk", 0xF3>;
+ let Defs = [CC] in {
+ def UNPKA : SideEffectBinarySSa<"unpka", 0xEA>;
+ def UNPKU : SideEffectBinarySSa<"unpku", 0xE2>;
+ }
+}
+
+let mayLoad = 1, mayStore = 1 in {
+ let Defs = [CC] in {
+ def AP : SideEffectBinarySSb<"ap", 0xFA>;
+ def SP : SideEffectBinarySSb<"sp", 0xFB>;
+ def ZAP : SideEffectBinarySSb<"zap", 0xF8>;
+ def SRP : SideEffectTernarySSc<"srp", 0xF0>;
+ }
+ def MP : SideEffectBinarySSb<"mp", 0xFC>;
+ def DP : SideEffectBinarySSb<"dp", 0xFD>;
+ let Defs = [CC] in {
+ def ED : SideEffectBinarySSa<"ed", 0xDE>;
+ def EDMK : SideEffectBinarySSa<"edmk", 0xDF>;
+ }
+}
+
+let Defs = [CC] in {
+ def CP : CompareSSb<"cp", 0xF9>;
+ def TP : TestRSL<"tp", 0xEBC0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Read a 32-bit access register into a GR32. As with all GR32 operations,
+// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
+// when a 64-bit address is stored in a pair of access registers.
+def EAR : UnaryRRE<"ear", 0xB24F, null_frag, GR32, AR32>;
+
+// Set access register.
+def SAR : UnaryRRE<"sar", 0xB24E, null_frag, AR32, GR32>;
+
+// Copy access register.
+def CPYA : UnaryRRE<"cpya", 0xB24D, null_frag, AR32, AR32>;
+
+// Load address extended.
+defm LAE : LoadAddressRXPair<"lae", 0x51, 0xE375, null_frag>;
+
+// Load access multiple.
+defm LAM : LoadMultipleRSPair<"lam", 0x9A, 0xEB9A, AR32>;
+
+// Load access multiple.
+defm STAM : StoreMultipleRSPair<"stam", 0x9B, 0xEB9B, AR32>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Extract CC and program mask into a register. CC ends up in bits 29 and 28.
+let Uses = [CC] in
+ def IPM : InherentRRE<"ipm", 0xB222, GR32, z_ipm>;
+
+// Set CC and program mask from a register.
+let hasSideEffects = 1, Defs = [CC] in
+ def SPM : SideEffectUnaryRR<"spm", 0x04, GR32>;
+
+// Branch and link - like BAS, but also extracts CC and program mask.
+let isCall = 1, Uses = [CC], Defs = [CC] in {
+ def BAL : CallRX<"bal", 0x45>;
+ def BALR : CallRR<"balr", 0x05>;
+}
+
+// Test addressing mode.
+let Defs = [CC] in
+ def TAM : SideEffectInherentE<"tam", 0x010B>;
+
+// Set addressing mode.
+let hasSideEffects = 1 in {
+ def SAM24 : SideEffectInherentE<"sam24", 0x010C>;
+ def SAM31 : SideEffectInherentE<"sam31", 0x010D>;
+ def SAM64 : SideEffectInherentE<"sam64", 0x010E>;
+}
+
+// Branch and set mode. Not really a call, but also sets an output register.
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in
+ def BSM : CallRR<"bsm", 0x0B>;
+
+// Branch and save and set mode.
+let isCall = 1, Defs = [CC] in
+ def BASSM : CallRR<"bassm", 0x0C>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in {
+ // Transaction Begin
+ let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in {
+ def TBEGIN : SideEffectBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>;
+ def TBEGIN_nofloat : SideEffectBinarySILPseudo<z_tbegin_nofloat, imm32zx16>;
+
+ def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561,
+ int_s390_tbeginc, imm32zx16>;
+ }
+
+ // Transaction End
+ let Defs = [CC] in
+ def TEND : SideEffectInherentS<"tend", 0xB2F8, z_tend>;
+
+ // Transaction Abort
+ let isTerminator = 1, isBarrier = 1 in
+ def TABORT : SideEffectAddressS<"tabort", 0xB2FC, int_s390_tabort>;
+
+ // Nontransactional Store
+ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+ // Extract Transaction Nesting Depth
+ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, int_s390_etnd>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+ let hasSideEffects = 1 in
+ def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>;
+ def : Pat<(int_s390_ppa_txassist GR32:$src),
+ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one, AKA count leading zeros. The instruction actually
+// returns a pair of GR64s, the first giving the number of leading zeros
+// and the second giving a copy of the source with the leftmost one bit
+// cleared. We only use the first result here.
+let Defs = [CC] in
+ def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
+def : Pat<(ctlz GR64:$src),
+ (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
+
+// Population count. Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in
+ def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
+
+// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
+
+// Extend GR32s and GR64s to GR128s.
+let usesCustomInserter = 1 in {
+ def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+ def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>;
+ def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+}
+
+// Search a block of memory for a character.
+let mayLoad = 1, Defs = [CC] in
+ defm SRST : StringRRE<"srst", 0xB25E, z_search_string>;
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ def SRSTU : SideEffectBinaryMemMemRRE<"srstu", 0xB9BE, GR64, GR64>;
+
+// Compare until substring equal.
+let mayLoad = 1, Defs = [CC], Uses = [R0L, R1L] in
+ def CUSE : SideEffectBinaryMemMemRRE<"cuse", 0xB257, GR128, GR128>;
+
+// Compare and form codeword.
+let mayLoad = 1, Defs = [CC, R1D, R2D, R3D], Uses = [R1D, R2D, R3D] in
+ def CFC : SideEffectAddressS<"cfc", 0xB21A, null_frag>;
+
+// Update tree.
+let mayLoad = 1, mayStore = 1, Defs = [CC, R0D, R1D, R2D, R3D, R5D],
+ Uses = [R0D, R1D, R2D, R3D, R4D, R5D] in
+ def UPT : SideEffectInherentE<"upt", 0x0102>;
+
+// Checksum.
+let mayLoad = 1, Defs = [CC] in
+ def CKSM : SideEffectBinaryMemMemRRE<"cksm", 0xB241, GR64, GR128>;
+
+// Compression call.
+let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
+ def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
+
+// Execute.
+let hasSideEffects = 1 in {
+ def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
+ def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+ def InsnE : DirectiveInsnE<(outs), (ins imm64zx16:$enc), ".insn e,$enc", []>;
+ def InsnRI : DirectiveInsnRI<(outs), (ins imm64zx32:$enc, AnyReg:$R1,
+ imm32sx16:$I2),
+ ".insn ri,$enc,$R1,$I2", []>;
+ def InsnRIE : DirectiveInsnRIE<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+ AnyReg:$R3, brtarget16:$I2),
+ ".insn rie,$enc,$R1,$R3,$I2", []>;
+ def InsnRIL : DirectiveInsnRIL<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+ brtarget32:$I2),
+ ".insn ril,$enc,$R1,$I2", []>;
+ def InsnRILU : DirectiveInsnRIL<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+ uimm32:$I2),
+ ".insn rilu,$enc,$R1,$I2", []>;
+ def InsnRIS : DirectiveInsnRIS<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1,
+ imm32sx8:$I2, imm32zx4:$M3,
+ bdaddr12only:$BD4),
+ ".insn ris,$enc,$R1,$I2,$M3,$BD4", []>;
+ def InsnRR : DirectiveInsnRR<(outs),
+ (ins imm64zx16:$enc, AnyReg:$R1, AnyReg:$R2),
+ ".insn rr,$enc,$R1,$R2", []>;
+ def InsnRRE : DirectiveInsnRRE<(outs), (ins imm64zx32:$enc,
+ AnyReg:$R1, AnyReg:$R2),
+ ".insn rre,$enc,$R1,$R2", []>;
+ def InsnRRF : DirectiveInsnRRF<(outs),
+ (ins imm64zx32:$enc, AnyReg:$R1, AnyReg:$R2,
+ AnyReg:$R3, imm32zx4:$M4),
+ ".insn rrf,$enc,$R1,$R2,$R3,$M4", []>;
+ def InsnRRS : DirectiveInsnRRS<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1,
+ AnyReg:$R2, imm32zx4:$M3,
+ bdaddr12only:$BD4),
+ ".insn rrs,$enc,$R1,$R2,$M3,$BD4", []>;
+ def InsnRS : DirectiveInsnRS<(outs),
+ (ins imm64zx32:$enc, AnyReg:$R1,
+ AnyReg:$R3, bdaddr12only:$BD2),
+ ".insn rs,$enc,$R1,$R3,$BD2", []>;
+ def InsnRSE : DirectiveInsnRSE<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1,
+ AnyReg:$R3, bdaddr12only:$BD2),
+ ".insn rse,$enc,$R1,$R3,$BD2", []>;
+ def InsnRSI : DirectiveInsnRSI<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1,
+ AnyReg:$R3, brtarget16:$RI2),
+ ".insn rsi,$enc,$R1,$R3,$RI2", []>;
+ def InsnRSY : DirectiveInsnRSY<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1,
+ AnyReg:$R3, bdaddr20only:$BD2),
+ ".insn rsy,$enc,$R1,$R3,$BD2", []>;
+ def InsnRX : DirectiveInsnRX<(outs), (ins imm64zx32:$enc, AnyReg:$R1,
+ bdxaddr12only:$XBD2),
+ ".insn rx,$enc,$R1,$XBD2", []>;
+ def InsnRXE : DirectiveInsnRXE<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+ bdxaddr12only:$XBD2),
+ ".insn rxe,$enc,$R1,$XBD2", []>;
+ def InsnRXF : DirectiveInsnRXF<(outs),
+ (ins imm64zx48:$enc, AnyReg:$R1,
+ AnyReg:$R3, bdxaddr12only:$XBD2),
+ ".insn rxf,$enc,$R1,$R3,$XBD2", []>;
+ def InsnRXY : DirectiveInsnRXY<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+ bdxaddr20only:$XBD2),
+ ".insn rxy,$enc,$R1,$XBD2", []>;
+ def InsnS : DirectiveInsnS<(outs),
+ (ins imm64zx32:$enc, bdaddr12only:$BD2),
+ ".insn s,$enc,$BD2", []>;
+ def InsnSI : DirectiveInsnSI<(outs),
+ (ins imm64zx32:$enc, bdaddr12only:$BD1,
+ imm32sx8:$I2),
+ ".insn si,$enc,$BD1,$I2", []>;
+ def InsnSIY : DirectiveInsnSIY<(outs),
+ (ins imm64zx48:$enc,
+ bdaddr20only:$BD1, imm32zx8:$I2),
+ ".insn siy,$enc,$BD1,$I2", []>;
+ def InsnSIL : DirectiveInsnSIL<(outs),
+ (ins imm64zx48:$enc, bdaddr12only:$BD1,
+ imm32zx16:$I2),
+ ".insn sil,$enc,$BD1,$I2", []>;
+ def InsnSS : DirectiveInsnSS<(outs),
+ (ins imm64zx48:$enc, bdraddr12only:$RBD1,
+ bdaddr12only:$BD2, AnyReg:$R3),
+ ".insn ss,$enc,$RBD1,$BD2,$R3", []>;
+ def InsnSSE : DirectiveInsnSSE<(outs),
+ (ins imm64zx48:$enc,
+ bdaddr12only:$BD1,bdaddr12only:$BD2),
+ ".insn sse,$enc,$BD1,$BD2", []>;
+ def InsnSSF : DirectiveInsnSSF<(outs),
+ (ins imm64zx48:$enc, bdaddr12only:$BD1,
+ bdaddr12only:$BD2, AnyReg:$R3),
+ ".insn ssf,$enc,$BD1,$BD2,$R3", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// Use AL* for GR64 additions of unsigned 32-bit values.
+defm : ZXB<add, GR64, ALGFR>;
+def : Pat<(add GR64:$src1, imm64zx32:$src2),
+ (ALGFI GR64:$src1, imm64zx32:$src2)>;
+def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
+ (ALGF GR64:$src1, bdxaddr20only:$addr)>;
+
+// Use SL* for GR64 subtractions of unsigned 32-bit values.
+defm : ZXB<sub, GR64, SLGFR>;
+def : Pat<(add GR64:$src1, imm64zx32n:$src2),
+ (SLGFI GR64:$src1, imm64zx32n:$src2)>;
+def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
+ (SLGF GR64:$src1, bdxaddr20only:$addr)>;
+
+// Optimize sign-extended 1/0 selects to -1/0 selects. This is important
+// for vector legalization.
+def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)),
+ (i32 31)),
+ (i32 31)),
+ (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
+def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
+ imm32zx4:$cc)))),
+ (i32 63)),
+ (i32 63)),
+ (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
+
+// Avoid generating 2 XOR instructions. (xor (and x, y), y) is
+// equivalent to (and (xor x, -1), y)
+def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
+ (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
+
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+ def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
+// Peepholes for turning scalar operations into block operations.
+defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence,
+ OCSequence, XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 8>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td
new file mode 100644
index 000000000000..a9803c2d83e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -0,0 +1,517 @@
+//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ system-level instructions.
+// Most of these instructions are privileged or semi-privileged. They are
+// not used for code generation, but are provided for use with the assembler
+// and disassembler only.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Program-Status Word Instructions.
+//===----------------------------------------------------------------------===//
+
+// Extract PSW.
+let hasSideEffects = 1, Uses = [CC] in
+ def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>;
+
+// Load PSW (extended).
+let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in {
+ def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>;
+ def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>;
+}
+
+// Insert PSW key.
+let Uses = [R2L], Defs = [R2L] in
+ def IPK : SideEffectInherentS<"ipk", 0xB20B, null_frag>;
+
+// Set PSW key from address.
+let hasSideEffects = 1 in
+ def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>;
+
+// Set system mask.
+let hasSideEffects = 1, mayLoad = 1 in
+ def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>;
+
+// Store then AND/OR system mask.
+let hasSideEffects = 1 in {
+ def STNSM : StoreSI<"stnsm", 0xAC, null_frag, imm32zx8>;
+ def STOSM : StoreSI<"stosm", 0xAD, null_frag, imm32zx8>;
+}
+
+// Insert address space control.
+let hasSideEffects = 1 in
+ def IAC : InherentRRE<"iac", 0xB224, GR32, null_frag>;
+
+// Set address space control (fast).
+let hasSideEffects = 1 in {
+ def SAC : SideEffectAddressS<"sac", 0xB219, null_frag>;
+ def SACF : SideEffectAddressS<"sacf", 0xB279, null_frag>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Register Instructions.
+//===----------------------------------------------------------------------===//
+
+// Load control.
+def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>;
+def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>;
+
+// Store control.
+def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>;
+def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>;
+
+// Extract primary ASN (and instance).
+let hasSideEffects = 1 in {
+ def EPAR : InherentRRE<"epar", 0xB226, GR32, null_frag>;
+ def EPAIR : InherentRRE<"epair", 0xB99A, GR64, null_frag>;
+}
+
+// Extract secondary ASN (and instance).
+let hasSideEffects = 1 in {
+ def ESAR : InherentRRE<"esar", 0xB227, GR32, null_frag>;
+ def ESAIR : InherentRRE<"esair", 0xB99B, GR64, null_frag>;
+}
+
+// Set secondary ASN (and instance).
+let hasSideEffects = 1 in {
+ def SSAR : SideEffectUnaryRRE<"ssar", 0xB225, GR32, null_frag>;
+ def SSAIR : SideEffectUnaryRRE<"ssair", 0xB99F, GR64, null_frag>;
+}
+
+// Extract and set extended authority.
+let hasSideEffects = 1 in
+ def ESEA : UnaryTiedRRE<"esea", 0xB99D, GR32>;
+
+//===----------------------------------------------------------------------===//
+// Prefix-Register Instructions.
+//===----------------------------------------------------------------------===//
+
+// Set prefix.
+let hasSideEffects = 1 in
+ def SPX : SideEffectUnaryS<"spx", 0xB210, null_frag, 4>;
+
+// Store prefix.
+let hasSideEffects = 1 in
+ def STPX : StoreInherentS<"stpx", 0xB211, null_frag, 4>;
+
+//===----------------------------------------------------------------------===//
+// Storage-Key and Real Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+// Insert storage key extended.
+let hasSideEffects = 1 in
+ def ISKE : BinaryRRE<"iske", 0xB229, null_frag, GR32, GR64>;
+
+// Insert virtual storage key.
+let hasSideEffects = 1 in
+ def IVSK : BinaryRRE<"ivsk", 0xB223, null_frag, GR32, GR64>;
+
+// Set storage key extended.
+let hasSideEffects = 1, Defs = [CC] in
+ defm SSKE : SideEffectTernaryRRFcOpt<"sske", 0xB22B, GR32, GR64>;
+
+// Reset reference bit extended.
+let hasSideEffects = 1, Defs = [CC] in
+ def RRBE : SideEffectBinaryRRE<"rrbe", 0xB22A, GR32, GR64>;
+
+// Reset reference bits multiple.
+let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in
+ def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>;
+
+// Perform frame management function.
+let hasSideEffects = 1 in
+ def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>;
+
+// Test block.
+let hasSideEffects = 1, mayStore = 1, Uses = [R0D], Defs = [R0D, CC] in
+ def TB : SideEffectBinaryRRE<"tb", 0xB22C, GR64, GR64>;
+
+// Page in / out.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def PGIN : SideEffectBinaryRRE<"pgin", 0xB22E, GR64, GR64>;
+ def PGOUT : SideEffectBinaryRRE<"pgout", 0xB22F, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic-Address-Translation Instructions.
+//===----------------------------------------------------------------------===//
+
+// Invalidate page table entry.
+let hasSideEffects = 1 in
+ defm IPTE : SideEffectQuaternaryRRFaOptOpt<"ipte", 0xB221, GR64, GR32, GR32>;
+
+// Invalidate DAT table entry.
+let hasSideEffects = 1 in
+ defm IDTE : SideEffectQuaternaryRRFbOpt<"idte", 0xB98E, GR64, GR64, GR64>;
+
+// Compare and replace DAT table entry.
+let Predicates = [FeatureEnhancedDAT2], hasSideEffects = 1, Defs = [CC] in
+ defm CRDTE : SideEffectQuaternaryRRFbOpt<"crdte", 0xB98F, GR128, GR128, GR64>;
+
+// Purge TLB.
+let hasSideEffects = 1 in
+ def PTLB : SideEffectInherentS<"ptlb", 0xB20D, null_frag>;
+
+// Compare and swap and purge.
+let hasSideEffects = 1, Defs = [CC] in {
+ def CSP : CmpSwapRRE<"csp", 0xB250, GR128, GR64>;
+ def CSPG : CmpSwapRRE<"cspg", 0xB98A, GR128, GR64>;
+}
+
+// Load page-table-entry address.
+let hasSideEffects = 1, Defs = [CC] in
+ def LPTEA : TernaryRRFb<"lptea", 0xB9AA, GR64, GR64, GR64>;
+
+// Load real address.
+let hasSideEffects = 1, Defs = [CC] in {
+ defm LRA : LoadAddressRXPair<"lra", 0xB1, 0xE313, null_frag>;
+ def LRAG : LoadAddressRXY<"lrag", 0xE303, null_frag, laaddr20pair>;
+}
+
+// Store real address.
+def STRAG : StoreSSE<"strag", 0xE502>;
+
+// Load using real address.
+let mayLoad = 1 in {
+ def LURA : UnaryRRE<"lura", 0xB24B, null_frag, GR32, GR64>;
+ def LURAG : UnaryRRE<"lurag", 0xB905, null_frag, GR64, GR64>;
+}
+
+// Store using real address.
+let mayStore = 1 in {
+ def STURA : SideEffectBinaryRRE<"stura", 0xB246, GR32, GR64>;
+ def STURG : SideEffectBinaryRRE<"sturg", 0xB925, GR64, GR64>;
+}
+
+// Test protection.
+let hasSideEffects = 1, Defs = [CC] in
+ def TPROT : SideEffectBinarySSE<"tprot", 0xE501>;
+
+//===----------------------------------------------------------------------===//
+// Memory-move Instructions.
+//===----------------------------------------------------------------------===//
+
+// Move with key.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
+ def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>;
+
+// Move to primary / secondary.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def MVCP : MemoryBinarySSd<"mvcp", 0xDA, GR64>;
+ def MVCS : MemoryBinarySSd<"mvcs", 0xDB, GR64>;
+}
+
+// Move with source / destination key.
+let mayLoad = 1, mayStore = 1, Uses = [R0L, R1L] in {
+ def MVCSK : SideEffectBinarySSE<"mvcsk", 0xE50E>;
+ def MVCDK : SideEffectBinarySSE<"mvcdk", 0xE50F>;
+}
+
+// Move with optional specifications.
+let mayLoad = 1, mayStore = 1, Uses = [R0L] in
+ def MVCOS : SideEffectTernarySSF<"mvcos", 0xC80, GR64>;
+
+// Move page.
+let mayLoad = 1, mayStore = 1, Uses = [R0L], Defs = [CC] in
+ def MVPG : SideEffectBinaryRRE<"mvpg", 0xB254, GR64, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Address-Space Instructions.
+//===----------------------------------------------------------------------===//
+
+// Load address space parameters.
+let hasSideEffects = 1, Defs = [CC] in
+ def LASP : SideEffectBinarySSE<"lasp", 0xE500>;
+
+// Purge ALB.
+let hasSideEffects = 1 in
+ def PALB : SideEffectInherentRRE<"palb", 0xB248>;
+
+// Program call.
+let hasSideEffects = 1 in
+ def PC : SideEffectAddressS<"pc", 0xB218, null_frag>;
+
+// Program return.
+let hasSideEffects = 1, Defs = [CC] in
+ def PR : SideEffectInherentE<"pr", 0x0101>;
+
+// Program transfer (with instance).
+let hasSideEffects = 1 in {
+ def PT : SideEffectBinaryRRE<"pt", 0xB228, GR32, GR64>;
+ def PTI : SideEffectBinaryRRE<"pti", 0xB99E, GR64, GR64>;
+}
+
+// Resume program.
+let hasSideEffects = 1, Defs = [CC] in
+ def RP : SideEffectAddressS<"rp", 0xB277, null_frag>;
+
+// Branch in subspace group.
+let hasSideEffects = 1 in
+ def BSG : UnaryRRE<"bsg", 0xB258, null_frag, GR64, GR64>;
+
+// Branch and set authority.
+let hasSideEffects = 1 in
+ def BSA : UnaryRRE<"bsa", 0xB25A, null_frag, GR64, GR64>;
+
+// Test access.
+let Defs = [CC] in
+ def TAR : SideEffectBinaryRRE<"tar", 0xB24C, AR32, GR32>;
+
+//===----------------------------------------------------------------------===//
+// Linkage-Stack Instructions.
+//===----------------------------------------------------------------------===//
+
+// Branch and stack.
+let hasSideEffects = 1 in
+ def BAKR : SideEffectBinaryRRE<"bakr", 0xB240, GR64, GR64>;
+
+// Extract stacked registers.
+let hasSideEffects = 1 in {
+ def EREG : SideEffectBinaryRRE<"ereg", 0xB249, GR32, GR32>;
+ def EREGG : SideEffectBinaryRRE<"eregg", 0xB90E, GR64, GR64>;
+}
+
+// Extract stacked state.
+let hasSideEffects = 1, Defs = [CC] in
+ def ESTA : UnaryRRE<"esta", 0xB24A, null_frag, GR128, GR32>;
+
+// Modify stacked state.
+let hasSideEffects = 1 in
+ def MSTA : SideEffectUnaryRRE<"msta", 0xB247, GR128, null_frag>;
+
+//===----------------------------------------------------------------------===//
+// Time-Related Instructions.
+//===----------------------------------------------------------------------===//
+
+// Perform timing facility function.
+let hasSideEffects = 1, mayLoad = 1, Uses = [R0L, R1D], Defs = [CC] in
+ def PTFF : SideEffectInherentE<"ptff", 0x0104>;
+
+// Set clock.
+let hasSideEffects = 1, Defs = [CC] in
+ def SCK : SideEffectUnaryS<"sck", 0xB204, null_frag, 8>;
+
+// Set clock programmable field.
+let hasSideEffects = 1, Uses = [R0L] in
+ def SCKPF : SideEffectInherentE<"sckpf", 0x0107>;
+
+// Set clock comparator.
+let hasSideEffects = 1 in
+ def SCKC : SideEffectUnaryS<"sckc", 0xB206, null_frag, 8>;
+
+// Set CPU timer.
+let hasSideEffects = 1 in
+ def SPT : SideEffectUnaryS<"spt", 0xB208, null_frag, 8>;
+
+// Store clock (fast / extended).
+let hasSideEffects = 1, Defs = [CC] in {
+ def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>;
+ def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>;
+ def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>;
+}
+
+// Store clock comparator.
+let hasSideEffects = 1 in
+ def STCKC : StoreInherentS<"stckc", 0xB207, null_frag, 8>;
+
+// Store CPU timer.
+let hasSideEffects = 1 in
+ def STPT : StoreInherentS<"stpt", 0xB209, null_frag, 8>;
+
+//===----------------------------------------------------------------------===//
+// CPU-Related Instructions.
+//===----------------------------------------------------------------------===//
+
+// Store CPU address.
+let hasSideEffects = 1 in
+ def STAP : StoreInherentS<"stap", 0xB212, null_frag, 2>;
+
+// Store CPU ID.
+let hasSideEffects = 1 in
+ def STIDP : StoreInherentS<"stidp", 0xB202, null_frag, 8>;
+
+// Store system information.
+let hasSideEffects = 1, Uses = [R0L, R1L], Defs = [R0L, CC] in
+ def STSI : StoreInherentS<"stsi", 0xB27D, null_frag, 0>;
+
+// Store facility list.
+let hasSideEffects = 1 in
+ def STFL : StoreInherentS<"stfl", 0xB2B1, null_frag, 4>;
+
+// Store facility list extended.
+let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in
+ def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>;
+
+// Extract CPU attribute.
+let hasSideEffects = 1 in
+ def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>;
+
+// Extract CPU time.
+let hasSideEffects = 1, mayLoad = 1, Defs = [R0D, R1D] in
+ def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>;
+
+// Perform topology function.
+let hasSideEffects = 1 in
+ def PTF : UnaryTiedRRE<"ptf", 0xB9A2, GR64>;
+
+// Perform cryptographic key management operation.
+let Predicates = [FeatureMessageSecurityAssist3],
+ hasSideEffects = 1, Uses = [R0L, R1D] in
+ def PCKMO : SideEffectInherentRRE<"pckmo", 0xB928>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Supervisor call.
+let hasSideEffects = 1, isCall = 1, Defs = [CC] in
+ def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>;
+
+// Monitor call.
+let hasSideEffects = 1, isCall = 1 in
+ def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>;
+
+// Diagnose.
+let hasSideEffects = 1, isCall = 1 in
+ def DIAG : SideEffectTernaryRS<"diag", 0x83, GR32, GR32>;
+
+// Trace.
+let hasSideEffects = 1, mayLoad = 1 in {
+ def TRACE : SideEffectTernaryRS<"trace", 0x99, GR32, GR32>;
+ def TRACG : SideEffectTernaryRSY<"tracg", 0xEB0F, GR64, GR64>;
+}
+
+// Trap.
+let hasSideEffects = 1 in {
+ def TRAP2 : SideEffectInherentE<"trap2", 0x01FF>;
+ def TRAP4 : SideEffectAddressS<"trap4", 0xB2FF, null_frag>;
+}
+
+// Signal processor.
+let hasSideEffects = 1, Defs = [CC] in
+ def SIGP : SideEffectTernaryRS<"sigp", 0xAE, GR64, GR64>;
+
+// Signal adapter.
+let hasSideEffects = 1, Uses = [R0D, R1D, R2D, R3D], Defs = [CC] in
+ def SIGA : SideEffectAddressS<"siga", 0xB274, null_frag>;
+
+// Start interpretive execution.
+let hasSideEffects = 1, Defs = [CC] in
+ def SIE : SideEffectUnaryS<"sie", 0xB214, null_frag, 0>;
+
+//===----------------------------------------------------------------------===//
+// CPU-Measurement Facility Instructions (SA23-2260).
+//===----------------------------------------------------------------------===//
+
+// Load program parameter
+let hasSideEffects = 1 in
+ def LPP : SideEffectUnaryS<"lpp", 0xB280, null_frag, 8>;
+
+// Extract coprocessor-group address.
+let hasSideEffects = 1, Defs = [CC] in
+ def ECPGA : UnaryRRE<"ecpga", 0xB2ED, null_frag, GR32, GR64>;
+
+// Extract CPU counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def ECCTR : UnaryRRE<"ecctr", 0xB2E4, null_frag, GR64, GR64>;
+
+// Extract peripheral counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def EPCTR : UnaryRRE<"epctr", 0xB2E5, null_frag, GR64, GR64>;
+
+// Load CPU-counter-set controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LCCTL : SideEffectUnaryS<"lcctl", 0xB284, null_frag, 8>;
+
+// Load peripheral-counter-set controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LPCTL : SideEffectUnaryS<"lpctl", 0xB285, null_frag, 8>;
+
+// Load sampling controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LSCTL : SideEffectUnaryS<"lsctl", 0xB287, null_frag, 0>;
+
+// Query sampling information.
+let hasSideEffects = 1 in
+ def QSI : StoreInherentS<"qsi", 0xB286, null_frag, 0>;
+
+// Query counter information.
+let hasSideEffects = 1 in
+ def QCTRI : StoreInherentS<"qctri", 0xB28E, null_frag, 0>;
+
+// Set CPU counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def SCCTR : SideEffectBinaryRRE<"scctr", 0xB2E0, GR64, GR64>;
+
+// Set peripheral counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def SPCTR : SideEffectBinaryRRE<"spctr", 0xB2E1, GR64, GR64>;
+
+//===----------------------------------------------------------------------===//
+// I/O Instructions (Principles of Operation, Chapter 14).
+//===----------------------------------------------------------------------===//
+
+// Clear subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def CSCH : SideEffectInherentS<"csch", 0xB230, null_frag>;
+
+// Halt subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def HSCH : SideEffectInherentS<"hsch", 0xB231, null_frag>;
+
+// Modify subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def MSCH : SideEffectUnaryS<"msch", 0xB232, null_frag, 0>;
+
+// Resume subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def RSCH : SideEffectInherentS<"rsch", 0xB238, null_frag>;
+
+// Start subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def SSCH : SideEffectUnaryS<"ssch", 0xB233, null_frag, 0>;
+
+// Store subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def STSCH : StoreInherentS<"stsch", 0xB234, null_frag, 0>;
+
+// Test subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def TSCH : StoreInherentS<"tsch", 0xB235, null_frag, 0>;
+
+// Cancel subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def XSCH : SideEffectInherentS<"xsch", 0xB276, null_frag>;
+
+// Reset channel path.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def RCHP : SideEffectInherentS<"rchp", 0xB23B, null_frag>;
+
+// Set channel monitor.
+let hasSideEffects = 1, mayLoad = 1, Uses = [R1L, R2D] in
+ def SCHM : SideEffectInherentS<"schm", 0xB23C, null_frag>;
+
+// Store channel path status.
+let hasSideEffects = 1 in
+ def STCPS : StoreInherentS<"stcps", 0xB23A, null_frag, 0>;
+
+// Store channel report word.
+let hasSideEffects = 1, Defs = [CC] in
+ def STCRW : StoreInherentS<"stcrw", 0xB239, null_frag, 0>;
+
+// Test pending interruption.
+let hasSideEffects = 1, Defs = [CC] in
+ def TPI : StoreInherentS<"tpi", 0xB236, null_frag, 0>;
+
+// Set address limit.
+let hasSideEffects = 1, Uses = [R1L] in
+ def SAL : SideEffectInherentS<"sal", 0xB237, null_frag>;
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
new file mode 100644
index 000000000000..0158fe6aec08
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -0,0 +1,1204 @@
+//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Register move.
+ def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
+ def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
+ def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
+
+ // Load GR from VR element.
+ def VLGV : BinaryVRScGeneric<"vlgv", 0xE721>;
+ def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
+ def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>;
+ def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>;
+ def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>;
+
+ // Load VR element from GR.
+ def VLVG : TernaryVRSbGeneric<"vlvg", 0xE722>;
+ def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert,
+ v128b, v128b, GR32, 0>;
+ def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert,
+ v128h, v128h, GR32, 1>;
+ def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert,
+ v128f, v128f, GR32, 2>;
+ def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert,
+ v128g, v128g, GR64, 3>;
+
+ // Load VR from GRs disjoint.
+ def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>;
+ def VLVGP32 : BinaryAliasVRRf<GR32>;
+}
+
+// Extractions always assign to the full GR64, even if the element would
+// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a
+// subreg of the result.
+class VectorExtractSubreg<ValueType type, Instruction insn>
+ : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)),
+ (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>;
+
+def : VectorExtractSubreg<v16i8, VLGVB>;
+def : VectorExtractSubreg<v8i16, VLGVH>;
+def : VectorExtractSubreg<v4i32, VLGVF>;
+
+//===----------------------------------------------------------------------===//
+// Immediate instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+
+ // Generate byte mask.
+ def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
+ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+
+ // Generate mask.
+ def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
+ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
+ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
+ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
+ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+
+ // Replicate immediate.
+ def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+ }
+
+ // Load element immediate.
+ //
+ // We want these instructions to be used ahead of VLVG* where possible.
+ // However, VLVG* takes a variable BD-format index whereas VLEI takes
+ // a plain immediate index. This means that VLVG* has an extra "base"
+ // register operand and is 3 units more complex. Bumping the complexity
+ // of the VLEI* instructions by 4 means that they are strictly better
+ // than VLVG* in cases where both forms match.
+ let AddedComplexity = 4 in {
+ def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert,
+ v128b, v128b, imm32sx16trunc, imm32zx4>;
+ def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert,
+ v128h, v128h, imm32sx16trunc, imm32zx3>;
+ def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert,
+ v128f, v128f, imm32sx16, imm32zx2>;
+ def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
+ v128g, v128g, imm64sx16, imm32zx1>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loads
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Load.
+ def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
+
+ // Load to block boundary. The number of loaded bytes is only known
+ // at run time. The instruction is really polymorphic, but v128b matches
+ // the return type of the associated intrinsic.
+ def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>;
+
+ // Load count to block boundary.
+ let Defs = [CC] in
+ def LCBB : InstRXE<0xE727, (outs GR32:$R1),
+ (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ "lcbb\t$R1, $XBD2, $M3",
+ [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
+ imm32zx4:$M3))]>;
+
+ // Load with length. The number of loaded bytes is only known at run time.
+ def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
+
+ // Load multiple.
+ def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
+
+ // Load and replicate
+ def VLREP : UnaryVRXGeneric<"vlrep", 0xE705>;
+ def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>;
+ def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
+ def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
+ def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
+ def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)),
+ (VLREPF bdxaddr12only:$addr)>;
+ def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
+ (VLREPG bdxaddr12only:$addr)>;
+
+ // Use VLREP to load subvectors. These patterns use "12pair" because
+ // LEY and LDY offer full 20-bit displacement fields. It's often better
+ // to use those instructions rather than force a 20-bit displacement
+ // into a GPR temporary.
+ def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
+ def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+
+ // Load logical element and zero.
+ def VLLEZ : UnaryVRXGeneric<"vllez", 0xE704>;
+ def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
+ def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
+ def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
+ def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
+ def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+ (VLLEZF bdxaddr12only:$addr)>;
+ def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+ (VLLEZG bdxaddr12only:$addr)>;
+
+ // Load element.
+ def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>;
+ def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
+ def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
+ def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
+ def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index),
+ (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
+ def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index),
+ (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
+
+ // Gather element.
+ def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
+ def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>;
+}
+
+// Use replicating loads if we're inserting a single element into an
+// undefined vector. This avoids a false dependency on the previous
+// register contents.
+multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype,
+ SDPatternOperator load, ValueType scalartype> {
+ def : Pat<(vectype (z_vector_insert
+ (undef), (scalartype (load bdxaddr12only:$addr)), 0)),
+ (vlrep bdxaddr12only:$addr)>;
+ def : Pat<(vectype (scalar_to_vector
+ (scalartype (load bdxaddr12only:$addr)))),
+ (vlrep bdxaddr12only:$addr)>;
+}
+defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
+defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
+defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
+defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
+defm : ReplicatePeephole<VLREPF, v4f32, load, f32>;
+defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
+
+//===----------------------------------------------------------------------===//
+// Stores
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Store.
+ def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
+
+ // Store with length. The number of stored bytes is only known at run time.
+ def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
+
+ // Store multiple.
+ def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
+
+ // Store element.
+ def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>;
+ def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
+ def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
+ def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
+ def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr,
+ imm32zx2:$index),
+ (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
+ def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr,
+ imm32zx1:$index),
+ (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
+
+ // Use VSTE to store subvectors. These patterns use "12pair" because
+ // STEY and STDY offer full 20-bit displacement fields. It's often better
+ // to use those instructions rather than force a 20-bit displacement
+ // into a GPR temporary.
+ def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
+ def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
+
+ // Scatter element.
+ def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
+ def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
+}
+
+//===----------------------------------------------------------------------===//
+// Selects and permutes
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Merge high.
+ def VMRH: BinaryVRRcGeneric<"vmrh", 0xE761>;
+ def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>;
+ def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
+ def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
+ def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
+ def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>;
+ def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
+
+ // Merge low.
+ def VMRL: BinaryVRRcGeneric<"vmrl", 0xE760>;
+ def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
+ def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
+ def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
+ def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
+ def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>;
+ def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>;
+
+ // Permute.
+ def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
+
+ // Permute doubleword immediate.
+ def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
+
+ // Replicate.
+ def VREP: BinaryVRIcGeneric<"vrep", 0xE74D>;
+ def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
+ def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
+ def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
+ def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
+ def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
+ (VREPF VR128:$vec, imm32zx16:$index)>;
+ def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
+ (VREPG VR128:$vec, imm32zx16:$index)>;
+
+ // Select.
+ def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
+}
+
+//===----------------------------------------------------------------------===//
+// Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Pack
+ def VPK : BinaryVRRcGeneric<"vpk", 0xE794>;
+ def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>;
+ def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>;
+ def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
+
+ // Pack saturate.
+ def VPKS : BinaryVRRbSPairGeneric<"vpks", 0xE797>;
+ defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc,
+ v128b, v128h, 1>;
+ defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc,
+ v128h, v128f, 2>;
+ defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc,
+ v128f, v128g, 3>;
+
+ // Pack saturate logical.
+ def VPKLS : BinaryVRRbSPairGeneric<"vpkls", 0xE795>;
+ defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc,
+ v128b, v128h, 1>;
+ defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc,
+ v128h, v128f, 2>;
+ defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc,
+ v128f, v128g, 3>;
+
+ // Sign-extend to doubleword.
+ def VSEG : UnaryVRRaGeneric<"vseg", 0xE75F>;
+ def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>;
+ def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>;
+ def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>;
+ def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>;
+ def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>;
+ def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
+
+ // Unpack high.
+ def VUPH : UnaryVRRaGeneric<"vuph", 0xE7D7>;
+ def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
+ def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
+ def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
+
+ // Unpack logical high.
+ def VUPLH : UnaryVRRaGeneric<"vuplh", 0xE7D5>;
+ def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
+ def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
+ def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
+
+ // Unpack low.
+ def VUPL : UnaryVRRaGeneric<"vupl", 0xE7D6>;
+ def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>;
+ def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
+ def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>;
+
+ // Unpack logical low.
+ def VUPLL : UnaryVRRaGeneric<"vupll", 0xE7D4>;
+ def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
+ def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
+ def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instantiating generic operations for specific types.
+//===----------------------------------------------------------------------===//
+
+multiclass GenericVectorOps<ValueType type, ValueType inttype> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (load bdxaddr12only:$addr)),
+ (VL bdxaddr12only:$addr)>;
+ def : Pat<(store (type VR128:$src), bdxaddr12only:$addr),
+ (VST VR128:$src, bdxaddr12only:$addr)>;
+ def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)),
+ (VSEL VR128:$y, VR128:$z, VR128:$x)>;
+ def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)),
+ (VSEL VR128:$z, VR128:$y, VR128:$x)>;
+ }
+}
+
+defm : GenericVectorOps<v16i8, v16i8>;
+defm : GenericVectorOps<v8i16, v8i16>;
+defm : GenericVectorOps<v4i32, v4i32>;
+defm : GenericVectorOps<v2i64, v2i64>;
+defm : GenericVectorOps<v4f32, v4i32>;
+defm : GenericVectorOps<v2f64, v2i64>;
+
+//===----------------------------------------------------------------------===//
+// Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Add.
+ def VA : BinaryVRRcGeneric<"va", 0xE7F3>;
+ def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
+ def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
+ def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
+ def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
+ def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
+
+ // Add compute carry.
+ def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>;
+ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
+ def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
+ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
+ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
+ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
+
+ // Add with carry.
+ def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>;
+ def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
+
+ // Add with carry compute carry.
+ def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
+ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+
+ // And.
+ def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
+
+ // And with complement.
+ def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
+
+ // Average.
+ def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>;
+ def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
+ def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
+ def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
+ def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
+
+ // Average logical.
+ def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
+ def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
+ def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
+ def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
+ def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+
+ // Checksum.
+ def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
+
+ // Count leading zeros.
+ def VCLZ : UnaryVRRaGeneric<"vclz", 0xE753>;
+ def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
+ def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
+ def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
+ def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
+
+ // Count trailing zeros.
+ def VCTZ : UnaryVRRaGeneric<"vctz", 0xE752>;
+ def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>;
+ def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
+ def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
+ def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
+
+ // Exclusive or.
+ def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+
+ // Galois field multiply sum.
+ def VGFM : BinaryVRRcGeneric<"vgfm", 0xE7B4>;
+ def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>;
+ def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>;
+ def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>;
+ def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>;
+
+ // Galois field multiply sum and accumulate.
+ def VGFMA : TernaryVRRdGeneric<"vgfma", 0xE7BC>;
+ def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>;
+ def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>;
+ def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>;
+ def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>;
+
+ // Load complement.
+ def VLC : UnaryVRRaGeneric<"vlc", 0xE7DE>;
+ def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
+ def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
+ def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
+ def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
+
+ // Load positive.
+ def VLP : UnaryVRRaGeneric<"vlp", 0xE7DF>;
+ def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>;
+ def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
+ def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
+ def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
+
+ // Maximum.
+ def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
+ def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
+ def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
+ def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
+ def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+
+ // Maximum logical.
+ def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
+ def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
+ def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
+ def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
+ def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+
+ // Minimum.
+ def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
+ def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
+ def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
+ def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
+ def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+
+ // Minimum logical.
+ def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
+ def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
+ def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
+ def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
+ def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+
+ // Multiply and add low.
+ def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>;
+ def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
+ def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
+ def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
+
+ // Multiply and add high.
+ def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>;
+ def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
+ def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
+ def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
+
+ // Multiply and add logical high.
+ def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
+ def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
+ def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
+ def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
+
+ // Multiply and add even.
+ def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>;
+ def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
+ def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
+ def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
+
+ // Multiply and add logical even.
+ def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>;
+ def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
+ def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
+ def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
+
+ // Multiply and add odd.
+ def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>;
+ def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
+ def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
+ def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
+
+ // Multiply and add logical odd.
+ def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
+ def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
+ def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
+ def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
+
+ // Multiply high.
+ def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>;
+ def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
+ def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
+ def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
+
+ // Multiply logical high.
+ def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
+ def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
+ def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
+ def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
+
+ // Multiply low.
+ def VML : BinaryVRRcGeneric<"vml", 0xE7A2>;
+ def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
+ def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
+ def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
+
+ // Multiply even.
+ def VME : BinaryVRRcGeneric<"vme", 0xE7A6>;
+ def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
+ def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
+ def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
+
+ // Multiply logical even.
+ def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>;
+ def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
+ def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
+ def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
+
+ // Multiply odd.
+ def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>;
+ def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
+ def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
+ def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
+
+ // Multiply logical odd.
+ def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
+ def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
+ def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
+ def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+
+ // Nor.
+ def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
+ def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>;
+
+ // Or.
+ def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
+
+ // Population count.
+ def VPOPCT : UnaryVRRaGeneric<"vpopct", 0xE750>;
+ def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
+
+ // Element rotate left logical (with vector shift amount).
+ def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>;
+ def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
+ v128b, v128b, 0>;
+ def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
+ v128h, v128h, 1>;
+ def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
+ v128f, v128f, 2>;
+ def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
+ v128g, v128g, 3>;
+
+ // Element rotate left logical (with scalar shift amount).
+ def VERLL : BinaryVRSaGeneric<"verll", 0xE733>;
+ def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
+ def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
+ def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
+ def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
+
+ // Element rotate and insert under mask.
+ def VERIM : QuaternaryVRIdGeneric<"verim", 0xE772>;
+ def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>;
+ def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>;
+ def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>;
+ def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>;
+
+ // Element shift left (with vector shift amount).
+ def VESLV : BinaryVRRcGeneric<"veslv", 0xE770>;
+ def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
+ def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>;
+ def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>;
+ def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>;
+
+ // Element shift left (with scalar shift amount).
+ def VESL : BinaryVRSaGeneric<"vesl", 0xE730>;
+ def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>;
+ def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>;
+ def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>;
+ def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>;
+
+ // Element shift right arithmetic (with vector shift amount).
+ def VESRAV : BinaryVRRcGeneric<"vesrav", 0xE77A>;
+ def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>;
+ def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>;
+ def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>;
+ def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>;
+
+ // Element shift right arithmetic (with scalar shift amount).
+ def VESRA : BinaryVRSaGeneric<"vesra", 0xE73A>;
+ def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>;
+ def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>;
+ def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>;
+ def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>;
+
+ // Element shift right logical (with vector shift amount).
+ def VESRLV : BinaryVRRcGeneric<"vesrlv", 0xE778>;
+ def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>;
+ def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>;
+ def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>;
+ def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>;
+
+ // Element shift right logical (with scalar shift amount).
+ def VESRL : BinaryVRSaGeneric<"vesrl", 0xE738>;
+ def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>;
+ def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>;
+ def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>;
+ def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>;
+
+ // Shift left.
+ def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>;
+
+ // Shift left by byte.
+ def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>;
+
+ // Shift left double by byte.
+ def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
+ def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
+ (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
+
+ // Shift right arithmetic.
+ def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
+
+ // Shift right arithmetic by byte.
+ def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>;
+
+ // Shift right logical.
+ def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>;
+
+ // Shift right logical by byte.
+ def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
+
+ // Subtract.
+ def VS : BinaryVRRcGeneric<"vs", 0xE7F7>;
+ def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
+ def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
+ def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
+ def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
+ def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
+
+ // Subtract compute borrow indication.
+ def VSCBI : BinaryVRRcGeneric<"vscbi", 0xE7F5>;
+ def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
+ def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
+ def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
+ def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>;
+ def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
+
+ // Subtract with borrow indication.
+ def VSBI : TernaryVRRdGeneric<"vsbi", 0xE7BF>;
+ def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
+
+ // Subtract with borrow compute borrow indication.
+ def VSBCBI : TernaryVRRdGeneric<"vsbcbi", 0xE7BD>;
+ def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
+ v128q, v128q, 4>;
+
+ // Sum across doubleword.
+ def VSUMG : BinaryVRRcGeneric<"vsumg", 0xE765>;
+ def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
+ def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>;
+
+ // Sum across quadword.
+ def VSUMQ : BinaryVRRcGeneric<"vsumq", 0xE767>;
+ def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>;
+ def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>;
+
+ // Sum across word.
+ def VSUM : BinaryVRRcGeneric<"vsum", 0xE764>;
+ def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>;
+ def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>;
+}
+
+// Instantiate the bitwise ops for type TYPE.
+multiclass BitwiseVectorOps<ValueType type> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>;
+ def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))),
+ (VNC VR128:$x, VR128:$y)>;
+ def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>;
+ def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$z),
+ (and VR128:$y, (z_vnot VR128:$z)))),
+ (VSEL VR128:$x, VR128:$y, VR128:$z)>;
+ def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))),
+ (VNO VR128:$x, VR128:$y)>;
+ def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
+ }
+}
+
+defm : BitwiseVectorOps<v16i8>;
+defm : BitwiseVectorOps<v8i16>;
+defm : BitwiseVectorOps<v4i32>;
+defm : BitwiseVectorOps<v2i64>;
+
+// Instantiate additional patterns for absolute-related expressions on
+// type TYPE. LC is the negate instruction for TYPE and LP is the absolute
+// instruction.
+multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc,
+ Instruction lp, int shift> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)),
+ (z_vneg VR128:$x), VR128:$x)),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))),
+ VR128:$x, (z_vneg VR128:$x))),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)),
+ VR128:$x, (z_vneg VR128:$x))),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))),
+ (z_vneg VR128:$x), VR128:$x)),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
+ (z_vneg VR128:$x)),
+ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
+ VR128:$x))),
+ (lp VR128:$x)>;
+ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
+ VR128:$x),
+ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
+ (z_vneg VR128:$x)))),
+ (lc (lp VR128:$x))>;
+ }
+}
+
+defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>;
+defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
+defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
+defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
+
+// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the
+// signed or unsigned "set if greater than" comparison instruction and
+// MIN and MAX are the associated minimum and maximum instructions.
+multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
+ Instruction min, Instruction max> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
+ (max VR128:$x, VR128:$y)>;
+ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
+ (min VR128:$x, VR128:$y)>;
+ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
+ VR128:$x, VR128:$y)),
+ (min VR128:$x, VR128:$y)>;
+ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
+ VR128:$y, VR128:$x)),
+ (max VR128:$x, VR128:$y)>;
+ }
+}
+
+// Signed min/max.
+defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
+defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
+defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
+defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
+
+// Unsigned min/max.
+defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
+defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
+defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
+defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
+
+//===----------------------------------------------------------------------===//
+// Integer comparison
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Element compare.
+ let Defs = [CC] in {
+ def VEC : CompareVRRaGeneric<"vec", 0xE7DB>;
+ def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>;
+ def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>;
+ def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>;
+ def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>;
+ }
+
+ // Element compare logical.
+ let Defs = [CC] in {
+ def VECL : CompareVRRaGeneric<"vecl", 0xE7D9>;
+ def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>;
+ def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>;
+ def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>;
+ def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>;
+ }
+
+ // Compare equal.
+ def VCEQ : BinaryVRRbSPairGeneric<"vceq", 0xE7F8>;
+ defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128b, v128b, 0>;
+ defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128h, v128h, 1>;
+ defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128f, v128f, 2>;
+ defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128g, v128g, 3>;
+
+ // Compare high.
+ def VCH : BinaryVRRbSPairGeneric<"vch", 0xE7FB>;
+ defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs,
+ v128b, v128b, 0>;
+ defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs,
+ v128h, v128h, 1>;
+ defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs,
+ v128f, v128f, 2>;
+ defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs,
+ v128g, v128g, 3>;
+
+ // Compare high logical.
+ def VCHL : BinaryVRRbSPairGeneric<"vchl", 0xE7F9>;
+ defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128b, v128b, 0>;
+ defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128h, v128h, 1>;
+ defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128f, v128f, 2>;
+ defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128g, v128g, 3>;
+
+ // Test under mask.
+ let Defs = [CC] in
+ def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// See comments in SystemZInstrFP.td for the suppression flags and
+// rounding modes.
+multiclass VectorRounding<Instruction insn, TypedReg tr> {
+ def : FPConversion<insn, frint, tr, tr, 0, 0>;
+ def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
+ def : FPConversion<insn, ffloor, tr, tr, 4, 7>;
+ def : FPConversion<insn, fceil, tr, tr, 4, 6>;
+ def : FPConversion<insn, ftrunc, tr, tr, 4, 5>;
+ def : FPConversion<insn, fround, tr, tr, 4, 1>;
+}
+
+let Predicates = [FeatureVector] in {
+ // Add.
+ def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
+ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
+
+ // Convert from fixed 64-bit.
+ def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
+ def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
+ def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+ def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+
+ // Convert from logical 64-bit.
+ def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
+ def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
+ def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+ def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+
+ // Convert to fixed 64-bit.
+ def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
+ def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
+ def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+
+ // Convert to logical 64-bit.
+ def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
+ def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
+ def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+
+ // Divide.
+ def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
+ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
+
+ // Load FP integer.
+ def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
+ def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
+ def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+ defm : VectorRounding<VFIDB, v128db>;
+ defm : VectorRounding<WFIDB, v64db>;
+
+ // Load lengthened.
+ def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>;
+
+ // Load rounded,
+ def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
+ def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
+ def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
+ def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+ def : FPConversion<WLEDB, fpround, v32eb, v64db, 0, 0>;
+
+ // Multiply.
+ def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
+ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
+
+ // Multiply and add.
+ def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
+ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
+
+ // Multiply and subtract.
+ def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
+ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
+
+ // Perform sign operation.
+ def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>;
+ def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>;
+ def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>;
+
+ // Load complement.
+ def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
+ def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
+
+ // Load negative.
+ def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
+ def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
+
+ // Load positive.
+ def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
+ def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
+
+ // Square root.
+ def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
+ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
+
+ // Subtract.
+ def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
+ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
+
+ // Test data class immediate.
+ let Defs = [CC] in {
+ def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>;
+ def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
+ def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Compare scalar.
+ let Defs = [CC] in {
+ def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+ }
+
+ // Compare and signal scalar.
+ let Defs = [CC] in {
+ def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+ }
+
+ // Compare equal.
+ def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ v128g, v128db, 3, 0>;
+ defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+
+ // Compare high.
+ def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ v128g, v128db, 3, 0>;
+ defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+
+ // Compare high or equal.
+ def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ v128g, v128db, 3, 0>;
+ defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Conversions
+//===----------------------------------------------------------------------===//
+
+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Replicating scalars
+//===----------------------------------------------------------------------===//
+
+// Define patterns for replicating a scalar GR32 into a vector of type TYPE.
+// INDEX is 8 minus the element size in bytes.
+class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index>
+ : Pat<(type (z_replicate GR32:$scalar)),
+ (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>;
+
+def : VectorReplicateScalar<v16i8, VREPB, 7>;
+def : VectorReplicateScalar<v8i16, VREPH, 3>;
+def : VectorReplicateScalar<v4i32, VREPF, 1>;
+
+// i64 replications are just a single isntruction.
+def : Pat<(v2i64 (z_replicate GR64:$scalar)),
+ (VLVGP GR64:$scalar, GR64:$scalar)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+// Moving 32-bit values between GPRs and FPRs can be done using VLVGF
+// and VLGVF.
+let Predicates = [FeatureVector] in {
+ def LEFR : UnaryAliasVRS<VR32, GR32>;
+ def LFER : UnaryAliasVRS<GR64, VR32>;
+ def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>;
+ def : Pat<(i32 (bitconvert (f32 VR32:$src))),
+ (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>;
+}
+
+// Floating-point values are stored in element 0 of the corresponding
+// vector register. Scalar to vector conversion is just a subreg and
+// scalar replication can just replicate element 0 of the vector register.
+multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
+ SubRegIndex subreg> {
+ def : Pat<(vt (scalar_to_vector cls:$scalar)),
+ (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>;
+ def : Pat<(vt (z_replicate cls:$scalar)),
+ (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
+ subreg), 0)>;
+}
+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+
+// Match v2f64 insertions. The AddedComplexity counters the 3 added by
+// TableGen for the base register operand in VLVG-based integer insertions
+// and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
+ (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+ subreg_r64), VR128:$vec, 1)>;
+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
+ (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+ subreg_r64), 0)>;
+}
+
+// We extract floating-point element X by replicating (for elements other
+// than 0) and then taking a high subreg. The AddedComplexity counters the
+// 3 added by TableGen for the base register operand in VLGV-based integer
+// extractions and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
+ (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
+ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
+ (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
+
+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
+ (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
+ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+}
+
+//===----------------------------------------------------------------------===//
+// String instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ defm VFAE : TernaryOptVRRbSPairGeneric<"vfae", 0xE782>;
+ defm VFAEB : TernaryOptVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb,
+ z_vfae_cc, v128b, v128b, 0>;
+ defm VFAEH : TernaryOptVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh,
+ z_vfae_cc, v128h, v128h, 1>;
+ defm VFAEF : TernaryOptVRRbSPair<"vfaef", 0xE782, int_s390_vfaef,
+ z_vfae_cc, v128f, v128f, 2>;
+ defm VFAEZB : TernaryOptVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb,
+ z_vfaez_cc, v128b, v128b, 0, 2>;
+ defm VFAEZH : TernaryOptVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh,
+ z_vfaez_cc, v128h, v128h, 1, 2>;
+ defm VFAEZF : TernaryOptVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf,
+ z_vfaez_cc, v128f, v128f, 2, 2>;
+
+ defm VFEE : BinaryExtraVRRbSPairGeneric<"vfee", 0xE780>;
+ defm VFEEB : BinaryExtraVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb,
+ z_vfee_cc, v128b, v128b, 0>;
+ defm VFEEH : BinaryExtraVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh,
+ z_vfee_cc, v128h, v128h, 1>;
+ defm VFEEF : BinaryExtraVRRbSPair<"vfeef", 0xE780, int_s390_vfeef,
+ z_vfee_cc, v128f, v128f, 2>;
+ defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb,
+ z_vfeez_cc, v128b, v128b, 0, 2>;
+ defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh,
+ z_vfeez_cc, v128h, v128h, 1, 2>;
+ defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf,
+ z_vfeez_cc, v128f, v128f, 2, 2>;
+
+ defm VFENE : BinaryExtraVRRbSPairGeneric<"vfene", 0xE781>;
+ defm VFENEB : BinaryExtraVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb,
+ z_vfene_cc, v128b, v128b, 0>;
+ defm VFENEH : BinaryExtraVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh,
+ z_vfene_cc, v128h, v128h, 1>;
+ defm VFENEF : BinaryExtraVRRbSPair<"vfenef", 0xE781, int_s390_vfenef,
+ z_vfene_cc, v128f, v128f, 2>;
+ defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb,
+ z_vfenez_cc, v128b, v128b, 0, 2>;
+ defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh,
+ z_vfenez_cc, v128h, v128h, 1, 2>;
+ defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf,
+ z_vfenez_cc, v128f, v128f, 2, 2>;
+
+ defm VISTR : UnaryExtraVRRaSPairGeneric<"vistr", 0xE75C>;
+ defm VISTRB : UnaryExtraVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb,
+ z_vistr_cc, v128b, v128b, 0>;
+ defm VISTRH : UnaryExtraVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh,
+ z_vistr_cc, v128h, v128h, 1>;
+ defm VISTRF : UnaryExtraVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf,
+ z_vistr_cc, v128f, v128f, 2>;
+
+ defm VSTRC : QuaternaryOptVRRdSPairGeneric<"vstrc", 0xE78A>;
+ defm VSTRCB : QuaternaryOptVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
+ z_vstrc_cc, v128b, v128b, 0>;
+ defm VSTRCH : QuaternaryOptVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
+ z_vstrc_cc, v128h, v128h, 1>;
+ defm VSTRCF : QuaternaryOptVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
+ z_vstrc_cc, v128f, v128f, 2>;
+ defm VSTRCZB : QuaternaryOptVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
+ z_vstrcz_cc, v128b, v128b, 0, 2>;
+ defm VSTRCZH : QuaternaryOptVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
+ z_vstrcz_cc, v128h, v128h, 1, 2>;
+ defm VSTRCZF : QuaternaryOptVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
+ z_vstrcz_cc, v128f, v128f, 2, 2>;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
new file mode 100644
index 000000000000..3a0e01da42f0
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -0,0 +1,146 @@
+//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines multiple accesses to local-dynamic TLS variables so that
+// the TLS base address for the module is only fetched once per execution path
+// through the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SystemZLDCleanup : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZLDCleanup(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
+
+ StringRef getPassName() const override {
+ return "SystemZ Local Dynamic TLS Access Clean-up";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg);
+ MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg);
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg);
+
+ const SystemZInstrInfo *TII;
+ MachineFunction *MF;
+};
+
+char SystemZLDCleanup::ID = 0;
+
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
+ return new SystemZLDCleanup(TM);
+}
+
+void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
+ if (skipFunction(*F.getFunction()))
+ return false;
+
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ MF = &F;
+
+ SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+}
+
+// Visit the dominator subtree rooted at Node in pre-order.
+// If TLSBaseAddrReg is non-null, then use that to replace any
+// TLS_LDCALL instructions. Otherwise, create the register
+// when the first such instruction is seen, and then use it
+// as we encounter more instructions.
+bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
+ unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ switch (I->getOpcode()) {
+ case SystemZ::TLS_LDCALL:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSCall(&*I, TLSBaseAddrReg);
+ else
+ I = SetRegister(&*I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (auto I = Node->begin(), E = Node->end(); I != E; ++I)
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+
+ return Changed;
+}
+
+// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg,
+// returning the new instruction.
+MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ // Insert a Copy from TLSBaseAddrReg to R2.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SystemZ::R2D)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_LDCALL instruction.
+ I->eraseFromParent();
+
+ return Copy;
+}
+
+// Create a virtal register in *TLSBaseAddrReg, and populate it by
+// inserting a copy instruction after I. Returns the new instruction.
+MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I,
+ unsigned *TLSBaseAddrReg) {
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+ // Insert a copy from R2 to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+ .addReg(SystemZ::R2D);
+
+ return Copy;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
new file mode 100644
index 000000000000..791f0334e0f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -0,0 +1,472 @@
+//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass makes sure that all branches are in range. There are several ways
+// in which this could be done. One aggressive approach is to assume that all
+// branches are in range and successively replace those that turn out not
+// to be in range with a longer form (branch relaxation). A simple
+// implementation is to continually walk through the function relaxing
+// branches until no more changes are needed and a fixed point is reached.
+// However, in the pathological worst case, this implementation is
+// quadratic in the number of blocks; relaxing branch N can make branch N-1
+// go out of range, which in turn can make branch N-2 go out of range,
+// and so on.
+//
+// An alternative approach is to assume that all branches must be
+// converted to their long forms, then reinstate the short forms of
+// branches that, even under this pessimistic assumption, turn out to be
+// in range (branch shortening). This too can be implemented as a function
+// walk that is repeated until a fixed point is reached. In general,
+// the result of shortening is not as good as that of relaxation, and
+// shortening is also quadratic in the worst case; shortening branch N
+// can bring branch N-1 in range of the short form, which in turn can do
+// the same for branch N-2, and so on. The main advantage of shortening
+// is that each walk through the function produces valid code, so it is
+// possible to stop at any point after the first walk. The quadraticness
+// could therefore be handled with a maximum pass count, although the
+// question then becomes: what maximum count should be used?
+//
+// On SystemZ, long branches are only needed for functions bigger than 64k,
+// which are relatively rare to begin with, and the long branch sequences
+// are actually relatively cheap. It therefore doesn't seem worth spending
+// much compilation time on the problem. Instead, the approach we take is:
+//
+// (1) Work out the address that each block would have if no branches
+// need relaxing. Exit the pass early if all branches are in range
+// according to this assumption.
+//
+// (2) Work out the address that each block would have if all branches
+// need relaxing.
+//
+// (3) Walk through the block calculating the final address of each instruction
+// and relaxing those that need to be relaxed. For backward branches,
+// this check uses the final address of the target block, as calculated
+// earlier in the walk. For forward branches, this check uses the
+// address of the target block that was calculated in (2). Both checks
+// give a conservatively-correct range.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-long-branch"
+
+STATISTIC(LongBranches, "Number of long branches.");
+
+namespace {
+
+// Represents positional information about a basic block.
+struct MBBInfo {
+ // The address that we currently assume the block has.
+ uint64_t Address = 0;
+
+ // The size of the block in bytes, excluding terminators.
+ // This value never changes.
+ uint64_t Size = 0;
+
+ // The minimum alignment of the block, as a log2 value.
+ // This value never changes.
+ unsigned Alignment = 0;
+
+ // The number of terminators in this block. This value never changes.
+ unsigned NumTerminators = 0;
+
+ MBBInfo() = default;
+};
+
+// Represents the state of a block terminator.
+struct TerminatorInfo {
+ // If this terminator is a relaxable branch, this points to the branch
+ // instruction, otherwise it is null.
+ MachineInstr *Branch = nullptr;
+
+ // The address that we currently assume the terminator has.
+ uint64_t Address = 0;
+
+ // The current size of the terminator in bytes.
+ uint64_t Size = 0;
+
+ // If Branch is nonnull, this is the number of the target block,
+ // otherwise it is unused.
+ unsigned TargetBlock = 0;
+
+ // If Branch is nonnull, this is the length of the longest relaxed form,
+ // otherwise it is zero.
+ unsigned ExtraRelaxSize = 0;
+
+ TerminatorInfo() = default;
+};
+
+// Used to keep track of the current position while iterating over the blocks.
+struct BlockPosition {
+ // The address that we assume this position has.
+ uint64_t Address = 0;
+
+ // The number of low bits in Address that are known to be the same
+ // as the runtime address.
+ unsigned KnownBits;
+
+ BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {}
+};
+
+class SystemZLongBranch : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SystemZLongBranch(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "SystemZ Long Branch"; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
+ void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
+ bool AssumeRelaxed);
+ TerminatorInfo describeTerminator(MachineInstr &MI);
+ uint64_t initMBBInfo();
+ bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
+ bool mustRelaxABranch();
+ void setWorstCaseAddresses();
+ void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode);
+ void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode);
+ void relaxBranch(TerminatorInfo &Terminator);
+ void relaxBranches();
+
+ const SystemZInstrInfo *TII = nullptr;
+ MachineFunction *MF;
+ SmallVector<MBBInfo, 16> MBBs;
+ SmallVector<TerminatorInfo, 16> Terminators;
+};
+
+char SystemZLongBranch::ID = 0;
+
+const uint64_t MaxBackwardRange = 0x10000;
+const uint64_t MaxForwardRange = 0xfffe;
+
+} // end anonymous namespace
+
+// Position describes the state immediately before Block. Update Block
+// accordingly and move Position to the end of the block's non-terminator
+// instructions.
+void SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
+ MBBInfo &Block) {
+ if (Block.Alignment > Position.KnownBits) {
+ // When calculating the address of Block, we need to conservatively
+ // assume that Block had the worst possible misalignment.
+ Position.Address += ((uint64_t(1) << Block.Alignment) -
+ (uint64_t(1) << Position.KnownBits));
+ Position.KnownBits = Block.Alignment;
+ }
+
+ // Align the addresses.
+ uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
+ Position.Address = (Position.Address + AlignMask) & ~AlignMask;
+
+ // Record the block's position.
+ Block.Address = Position.Address;
+
+ // Move past the non-terminators in the block.
+ Position.Address += Block.Size;
+}
+
+// Position describes the state immediately before Terminator.
+// Update Terminator accordingly and move Position past it.
+// Assume that Terminator will be relaxed if AssumeRelaxed.
+void SystemZLongBranch::skipTerminator(BlockPosition &Position,
+ TerminatorInfo &Terminator,
+ bool AssumeRelaxed) {
+ Terminator.Address = Position.Address;
+ Position.Address += Terminator.Size;
+ if (AssumeRelaxed)
+ Position.Address += Terminator.ExtraRelaxSize;
+}
+
+// Return a description of terminator instruction MI.
+TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
+ TerminatorInfo Terminator;
+ Terminator.Size = TII->getInstSizeInBytes(MI);
+ if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
+ switch (MI.getOpcode()) {
+ case SystemZ::J:
+ // Relaxes to JG, which is 2 bytes longer.
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::BRC:
+ // Relaxes to BRCL, which is 2 bytes longer.
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::BRCT:
+ case SystemZ::BRCTG:
+ // Relaxes to A(G)HI and BRCL, which is 6 bytes longer.
+ Terminator.ExtraRelaxSize = 6;
+ break;
+ case SystemZ::BRCTH:
+ // Never needs to be relaxed.
+ Terminator.ExtraRelaxSize = 0;
+ break;
+ case SystemZ::CRJ:
+ case SystemZ::CLRJ:
+ // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer.
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::CGRJ:
+ case SystemZ::CLGRJ:
+ // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer.
+ Terminator.ExtraRelaxSize = 4;
+ break;
+ case SystemZ::CIJ:
+ case SystemZ::CGIJ:
+ // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer.
+ Terminator.ExtraRelaxSize = 4;
+ break;
+ case SystemZ::CLIJ:
+ case SystemZ::CLGIJ:
+ // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer.
+ Terminator.ExtraRelaxSize = 6;
+ break;
+ default:
+ llvm_unreachable("Unrecognized branch instruction");
+ }
+ Terminator.Branch = &MI;
+ Terminator.TargetBlock =
+ TII->getBranchInfo(MI).Target->getMBB()->getNumber();
+ }
+ return Terminator;
+}
+
+// Fill MBBs and Terminators, setting the addresses on the assumption
+// that no branches need relaxation. Return the size of the function under
+// this assumption.
+uint64_t SystemZLongBranch::initMBBInfo() {
+ MF->RenumberBlocks();
+ unsigned NumBlocks = MF->size();
+
+ MBBs.clear();
+ MBBs.resize(NumBlocks);
+
+ Terminators.clear();
+ Terminators.reserve(NumBlocks);
+
+ BlockPosition Position(MF->getAlignment());
+ for (unsigned I = 0; I < NumBlocks; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I);
+ MBBInfo &Block = MBBs[I];
+
+ // Record the alignment, for quick access.
+ Block.Alignment = MBB->getAlignment();
+
+ // Calculate the size of the fixed part of the block.
+ MachineBasicBlock::iterator MI = MBB->begin();
+ MachineBasicBlock::iterator End = MBB->end();
+ while (MI != End && !MI->isTerminator()) {
+ Block.Size += TII->getInstSizeInBytes(*MI);
+ ++MI;
+ }
+ skipNonTerminators(Position, Block);
+
+ // Add the terminators.
+ while (MI != End) {
+ if (!MI->isDebugValue()) {
+ assert(MI->isTerminator() && "Terminator followed by non-terminator");
+ Terminators.push_back(describeTerminator(*MI));
+ skipTerminator(Position, Terminators.back(), false);
+ ++Block.NumTerminators;
+ }
+ ++MI;
+ }
+ }
+
+ return Position.Address;
+}
+
+// Return true if, under current assumptions, Terminator would need to be
+// relaxed if it were placed at address Address.
+bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator,
+ uint64_t Address) {
+ if (!Terminator.Branch)
+ return false;
+
+ const MBBInfo &Target = MBBs[Terminator.TargetBlock];
+ if (Address >= Target.Address) {
+ if (Address - Target.Address <= MaxBackwardRange)
+ return false;
+ } else {
+ if (Target.Address - Address <= MaxForwardRange)
+ return false;
+ }
+
+ return true;
+}
+
+// Return true if, under current assumptions, any terminator needs
+// to be relaxed.
+bool SystemZLongBranch::mustRelaxABranch() {
+ for (auto &Terminator : Terminators)
+ if (mustRelaxBranch(Terminator, Terminator.Address))
+ return true;
+ return false;
+}
+
+// Set the address of each block on the assumption that all branches
+// must be long.
+void SystemZLongBranch::setWorstCaseAddresses() {
+ SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
+ BlockPosition Position(MF->getAlignment());
+ for (auto &Block : MBBs) {
+ skipNonTerminators(Position, Block);
+ for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) {
+ skipTerminator(Position, *TI, true);
+ ++TI;
+ }
+ }
+}
+
+// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed
+// by a BRCL on the result.
+void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
+ unsigned AddOpcode) {
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(-1);
+ MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE)
+ .add(MI->getOperand(2));
+ // The implicit use of CC is a killing use.
+ BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
+ MI->eraseFromParent();
+}
+
+// Split MI into the comparison given by CompareOpcode followed
+// a BRCL on the result.
+void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
+ unsigned CompareOpcode) {
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
+ MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .add(MI->getOperand(2))
+ .add(MI->getOperand(3));
+ // The implicit use of CC is a killing use.
+ BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
+ MI->eraseFromParent();
+}
+
+// Relax the branch described by Terminator.
+void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
+ MachineInstr *Branch = Terminator.Branch;
+ switch (Branch->getOpcode()) {
+ case SystemZ::J:
+ Branch->setDesc(TII->get(SystemZ::JG));
+ break;
+ case SystemZ::BRC:
+ Branch->setDesc(TII->get(SystemZ::BRCL));
+ break;
+ case SystemZ::BRCT:
+ splitBranchOnCount(Branch, SystemZ::AHI);
+ break;
+ case SystemZ::BRCTG:
+ splitBranchOnCount(Branch, SystemZ::AGHI);
+ break;
+ case SystemZ::CRJ:
+ splitCompareBranch(Branch, SystemZ::CR);
+ break;
+ case SystemZ::CGRJ:
+ splitCompareBranch(Branch, SystemZ::CGR);
+ break;
+ case SystemZ::CIJ:
+ splitCompareBranch(Branch, SystemZ::CHI);
+ break;
+ case SystemZ::CGIJ:
+ splitCompareBranch(Branch, SystemZ::CGHI);
+ break;
+ case SystemZ::CLRJ:
+ splitCompareBranch(Branch, SystemZ::CLR);
+ break;
+ case SystemZ::CLGRJ:
+ splitCompareBranch(Branch, SystemZ::CLGR);
+ break;
+ case SystemZ::CLIJ:
+ splitCompareBranch(Branch, SystemZ::CLFI);
+ break;
+ case SystemZ::CLGIJ:
+ splitCompareBranch(Branch, SystemZ::CLGFI);
+ break;
+ default:
+ llvm_unreachable("Unrecognized branch");
+ }
+
+ Terminator.Size += Terminator.ExtraRelaxSize;
+ Terminator.ExtraRelaxSize = 0;
+ Terminator.Branch = nullptr;
+
+ ++LongBranches;
+}
+
+// Run a shortening pass and relax any branches that need to be relaxed.
+void SystemZLongBranch::relaxBranches() {
+ SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
+ BlockPosition Position(MF->getAlignment());
+ for (auto &Block : MBBs) {
+ skipNonTerminators(Position, Block);
+ for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) {
+ assert(Position.Address <= TI->Address &&
+ "Addresses shouldn't go forwards");
+ if (mustRelaxBranch(*TI, Position.Address))
+ relaxBranch(*TI);
+ skipTerminator(Position, *TI, false);
+ ++TI;
+ }
+ }
+}
+
+bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ MF = &F;
+ uint64_t Size = initMBBInfo();
+ if (Size <= MaxForwardRange || !mustRelaxABranch())
+ return false;
+
+ setWorstCaseAddresses();
+ relaxBranches();
+ return true;
+}
+
+FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
+ return new SystemZLongBranch(TM);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
new file mode 100644
index 000000000000..2655e4866b20
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -0,0 +1,103 @@
+//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCInstLower.h"
+#include "SystemZAsmPrinter.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// Return the VK_* enumeration for MachineOperand target flags Flags.
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
+ switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
+ case 0:
+ return MCSymbolRefExpr::VK_None;
+ case SystemZII::MO_GOT:
+ return MCSymbolRefExpr::VK_GOT;
+ case SystemZII::MO_INDNTPOFF:
+ return MCSymbolRefExpr::VK_INDNTPOFF;
+ }
+ llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
+}
+
+SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx,
+ SystemZAsmPrinter &asmprinter)
+ : Ctx(ctx), AsmPrinter(asmprinter) {}
+
+const MCExpr *
+SystemZMCInstLower::getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const {
+ const MCSymbol *Symbol;
+ bool HasOffset = true;
+ switch (MO.getType()) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = AsmPrinter.getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ default:
+ llvm_unreachable("unknown operand type");
+ }
+ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx);
+ if (HasOffset)
+ if (int64_t Offset = MO.getOffset()) {
+ const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx);
+ Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx);
+ }
+ return Expr;
+}
+
+MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ return MCOperand::createReg(MO.getReg());
+
+ case MachineOperand::MO_Immediate:
+ return MCOperand::createImm(MO.getImm());
+
+ default: {
+ MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+ return MCOperand::createExpr(getExpr(MO, Kind));
+ }
+ }
+}
+
+void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ // Ignore all implicit register operands.
+ if (!MO.isReg() || !MO.isImplicit())
+ OutMI.addOperand(lowerOperand(MO));
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
new file mode 100644
index 000000000000..7173cfa42959
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -0,0 +1,44 @@
+//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCInst;
+class MCOperand;
+class MachineInstr;
+class MachineOperand;
+class Mangler;
+class SystemZAsmPrinter;
+
+class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
+ MCContext &Ctx;
+ SystemZAsmPrinter &AsmPrinter;
+
+public:
+ SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter);
+
+ // Lower MachineInstr MI to MCInst OutMI.
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ // Return an MCOperand for MO.
+ MCOperand lowerOperand(const MachineOperand& MO) const;
+
+ // Return an MCExpr for symbolic operand MO with variant kind Kind.
+ const MCExpr *getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..1a7c0d7f687a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -0,0 +1,17 @@
+//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+
+using namespace llvm;
+
+
+// pin vtable to this file
+void SystemZMachineFunctionInfo::anchor() {}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 000000000000..4f64f4c65f1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,79 @@
+//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+ unsigned LowSavedGPR;
+ unsigned HighSavedGPR;
+ unsigned VarArgsFirstGPR;
+ unsigned VarArgsFirstFPR;
+ unsigned VarArgsFrameIndex;
+ unsigned RegSaveFrameIndex;
+ int FramePointerSaveIndex;
+ bool ManipulatesSP;
+ unsigned NumLocalDynamics;
+
+public:
+ explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+ : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0),
+ ManipulatesSP(false), NumLocalDynamics(0) {}
+
+ // Get and set the first call-saved GPR that should be saved and restored
+ // by this function. This is 0 if no GPRs need to be saved or restored.
+ unsigned getLowSavedGPR() const { return LowSavedGPR; }
+ void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; }
+
+ // Get and set the last call-saved GPR that should be saved and restored
+ // by this function.
+ unsigned getHighSavedGPR() const { return HighSavedGPR; }
+ void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; }
+
+ // Get and set the number of fixed (as opposed to variable) arguments
+ // that are passed in GPRs to this function.
+ unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+ void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+ // Likewise FPRs.
+ unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
+ void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; }
+
+ // Get and set the frame index of the first stack vararg.
+ unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+
+ // Get and set the frame index of the register save area
+ // (i.e. the incoming stack pointer).
+ unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+ void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; }
+
+ // Get and set the frame index of where the old frame pointer is stored.
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ // Get and set whether the function directly manipulates the stack pointer,
+ // e.g. through STACKSAVE or STACKRESTORE.
+ bool getManipulatesSP() const { return ManipulatesSP; }
+ void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+
+ // Count number of local-dynamic TLS symbols used.
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
new file mode 100644
index 000000000000..b6feaa49d858
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -0,0 +1,153 @@
+//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// -------------------------- Post RA scheduling ---------------------------- //
+// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
+// the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
+// implementation that looks to optimize decoder grouping and balance the
+// usage of processor resources.
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineScheduler.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+#ifndef NDEBUG
+// Print the set of SUs
+void SystemZPostRASchedStrategy::SUSet::
+dump(SystemZHazardRecognizer &HazardRec) const {
+ dbgs() << "{";
+ for (auto &SU : *this) {
+ HazardRec.dumpSU(SU, dbgs());
+ if (SU != *rbegin())
+ dbgs() << ", ";
+ }
+ dbgs() << "}\n";
+}
+#endif
+
+SystemZPostRASchedStrategy::
+SystemZPostRASchedStrategy(const MachineSchedContext *C)
+ : DAG(nullptr), HazardRec(C) {}
+
+void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ HazardRec.setDAG(dag);
+ HazardRec.Reset();
+}
+
+// Pick the next node to schedule.
+SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
+ // Only scheduling top-down.
+ IsTopNode = true;
+
+ if (Available.empty())
+ return nullptr;
+
+ // If only one choice, return it.
+ if (Available.size() == 1) {
+ DEBUG (dbgs() << "+++ Only one: ";
+ HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
+ return *Available.begin();
+ }
+
+ // All nodes that are possible to schedule are stored by in the
+ // Available set.
+ DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec););
+
+ Candidate Best;
+ for (auto *SU : Available) {
+
+ // SU is the next candidate to be compared against current Best.
+ Candidate c(SU, HazardRec);
+
+ // Remeber which SU is the best candidate.
+ if (Best.SU == nullptr || c < Best) {
+ Best = c;
+ DEBUG(dbgs() << "+++ Best sofar: ";
+ HazardRec.dumpSU(Best.SU, dbgs());
+ if (Best.GroupingCost != 0)
+ dbgs() << "\tGrouping cost:" << Best.GroupingCost;
+ if (Best.ResourcesCost != 0)
+ dbgs() << " Resource cost:" << Best.ResourcesCost;
+ dbgs() << " Height:" << Best.SU->getHeight();
+ dbgs() << "\n";);
+ }
+
+ // Once we know we have seen all SUs that affect grouping or use unbuffered
+ // resources, we can stop iterating if Best looks good.
+ if (!SU->isScheduleHigh && Best.noCost())
+ break;
+ }
+
+ assert (Best.SU != nullptr);
+ return Best.SU;
+}
+
+SystemZPostRASchedStrategy::Candidate::
+Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
+ SU = SU_;
+
+ // Check the grouping cost. For a node that must begin / end a
+ // group, it is positive if it would do so prematurely, or negative
+ // if it would fit naturally into the schedule.
+ GroupingCost = HazardRec.groupingCost(SU);
+
+ // Check the resources cost for this SU.
+ ResourcesCost = HazardRec.resourcesCost(SU);
+}
+
+bool SystemZPostRASchedStrategy::Candidate::
+operator<(const Candidate &other) {
+
+ // Check decoder grouping.
+ if (GroupingCost < other.GroupingCost)
+ return true;
+ if (GroupingCost > other.GroupingCost)
+ return false;
+
+ // Compare the use of resources.
+ if (ResourcesCost < other.ResourcesCost)
+ return true;
+ if (ResourcesCost > other.ResourcesCost)
+ return false;
+
+ // Higher SU is otherwise generally better.
+ if (SU->getHeight() > other.SU->getHeight())
+ return true;
+ if (SU->getHeight() < other.SU->getHeight())
+ return false;
+
+ // If all same, fall back to original order.
+ if (SU->NodeNum < other.SU->NodeNum)
+ return true;
+
+ return false;
+}
+
+void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+ DEBUG(dbgs() << "+++ Scheduling SU(" << SU->NodeNum << ")\n";);
+
+ // Remove SU from Available set and update HazardRec.
+ Available.erase(SU);
+ HazardRec.EmitInstruction(SU);
+}
+
+void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
+ // Set isScheduleHigh flag on all SUs that we want to consider first in
+ // pickNode().
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
+ SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
+
+ // Put all released SUs in the Available set.
+ Available.insert(SU);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
new file mode 100644
index 000000000000..3dfef388691e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -0,0 +1,112 @@
+//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// -------------------------- Post RA scheduling ---------------------------- //
+// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
+// the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
+// implementation that looks to optimize decoder grouping and balance the
+// usage of processor resources.
+//===----------------------------------------------------------------------===//
+
+#include "SystemZHazardRecognizer.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <set>
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+
+using namespace llvm;
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
+class SystemZPostRASchedStrategy : public MachineSchedStrategy {
+ ScheduleDAGMI *DAG;
+
+ /// A candidate during instruction evaluation.
+ struct Candidate {
+ SUnit *SU = nullptr;
+
+ /// The decoding cost.
+ int GroupingCost = 0;
+
+ /// The processor resources cost.
+ int ResourcesCost = 0;
+
+ Candidate() = default;
+ Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec);
+
+ // Compare two candidates.
+ bool operator<(const Candidate &other);
+
+ // Check if this node is free of cost ("as good as any").
+ bool noCost() const {
+ return (GroupingCost <= 0 && !ResourcesCost);
+ }
+ };
+
+ // A sorter for the Available set that makes sure that SUs are considered
+ // in the best order.
+ struct SUSorter {
+ bool operator() (SUnit *lhs, SUnit *rhs) const {
+ if (lhs->isScheduleHigh && !rhs->isScheduleHigh)
+ return true;
+ if (!lhs->isScheduleHigh && rhs->isScheduleHigh)
+ return false;
+
+ if (lhs->getHeight() > rhs->getHeight())
+ return true;
+ else if (lhs->getHeight() < rhs->getHeight())
+ return false;
+
+ return (lhs->NodeNum < rhs->NodeNum);
+ }
+ };
+ // A set of SUs with a sorter and dump method.
+ struct SUSet : std::set<SUnit*, SUSorter> {
+ #ifndef NDEBUG
+ void dump(SystemZHazardRecognizer &HazardRec) const;
+ #endif
+ };
+
+ /// The set of available SUs to schedule next.
+ SUSet Available;
+
+ // HazardRecognizer that tracks the scheduler state for the current
+ // region.
+ SystemZHazardRecognizer HazardRec;
+
+public:
+ SystemZPostRASchedStrategy(const MachineSchedContext *C);
+
+ /// PostRA scheduling does not track pressure.
+ bool shouldTrackPressure() const override { return false; }
+
+ /// Initialize the strategy after building the DAG for a new region.
+ void initialize(ScheduleDAGMI *dag) override;
+
+ /// Pick the next node to schedule, or return NULL.
+ SUnit *pickNode(bool &IsTopNode) override;
+
+ /// ScheduleDAGMI has scheduled an instruction - tell HazardRec
+ /// about it.
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+
+ /// SU has had all predecessor dependencies resolved. Put it into
+ /// Available.
+ void releaseTopNode(SUnit *SU) override;
+
+ /// Currently only scheduling top-down, so this method is empty.
+ void releaseBottomNode(SUnit *SU) override {};
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 000000000000..713612129d90
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,595 @@
+//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions
+//===----------------------------------------------------------------------===//
+
+class ImmediateAsmOperand<string name>
+ : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addImmOperands";
+}
+class ImmediateTLSAsmOperand<string name>
+ : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addImmTLSOperands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an immediate
+// of type VT. PRED returns true if a node is acceptable and XFORM returns
+// the operand value associated with the node. ASMOP is the name of the
+// associated asm operand, and also forms the basis of the asm print method.
+class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
+ : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
+ let PrintMethod = "print"##asmop##"Operand";
+ let DecoderMethod = "decode"##asmop##"Operand";
+ let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+}
+
+// Constructs an asm operand for a PC-relative address. SIZE says how
+// many bits there are.
+class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parsePCRel"##size;
+}
+class PCRelTLSAsmOperand<string size>
+ : ImmediateTLSAsmOperand<"PCRelTLS"##size> {
+ let PredicateMethod = "isImmTLS";
+ let ParserMethod = "parsePCRelTLS"##size;
+}
+
+// Constructs an operand for a PC-relative address with address type VT.
+// ASMOP is the associated asm operand.
+class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+ let PrintMethod = "printPCRelOperand";
+ let ParserMatchClass = asmop;
+}
+class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+ let PrintMethod = "printPCRelTLSOperand";
+ let ParserMatchClass = asmop;
+}
+
+// Constructs both a DAG pattern and instruction operand for a PC-relative
+// address with address size VT. SELF is the name of the operand and
+// ASMOP is the associated asm operand.
+class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
+ : ComplexPattern<vt, 1, "selectPCRelAddress",
+ [z_pcrel_wrapper, z_pcrel_offset]>,
+ PCRelOperand<vt, asmop> {
+ let MIOperandInfo = (ops !cast<Operand>(self));
+}
+
+// Constructs an AsmOperandClass for addressing mode FORMAT, treating the
+// registers as having BITSIZE bits and displacements as having DISPSIZE bits.
+// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it
+// is "".
+class AddressAsmOperand<string format, string bitsize, string dispsize,
+ string length = "">
+ : AsmOperandClass {
+ let Name = format##bitsize##"Disp"##dispsize##length;
+ let ParserMethod = "parse"##format##bitsize;
+ let RenderMethod = "add"##format##"Operands";
+}
+
+// Constructs an instruction operand for an addressing mode. FORMAT,
+// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
+// AddressAsmOperand. OPERANDS is a list of individual operands
+// (base register, displacement, etc.).
+class AddressOperand<string bitsize, string dispsize, string length,
+ string format, dag operands>
+ : Operand<!cast<ValueType>("i"##bitsize)> {
+ let PrintMethod = "print"##format##"Operand";
+ let EncoderMethod = "get"##format##dispsize##length##"Encoding";
+ let DecoderMethod =
+ "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+ let MIOperandInfo = operands;
+ let ParserMatchClass =
+ !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
+}
+
+// Constructs both a DAG pattern and instruction operand for an addressing mode.
+// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
+// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands
+// (base register, displacement, etc.). SELTYPE is the type of the memory
+// operand for selection purposes; sometimes we want different selection
+// choices for the same underlying addressing mode. SUFFIX is similarly
+// a suffix appended to the displacement for selection purposes;
+// e.g. we want to reject small 20-bit displacements if a 12-bit form
+// also exists, but we want to accept them otherwise.
+class AddressingMode<string seltype, string bitsize, string dispsize,
+ string suffix, string length, int numops, string format,
+ dag operands>
+ : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
+ "select"##seltype##dispsize##suffix##length,
+ [add, sub, or, frameindex, z_adjdynalloc]>,
+ AddressOperand<bitsize, dispsize, length, format, operands>;
+
+// An addressing mode with a base and displacement but no index.
+class BDMode<string type, string bitsize, string dispsize, string suffix>
+ : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>;
+
+// An addressing mode with a base, displacement and index.
+class BDXMode<string type, string bitsize, string dispsize, string suffix>
+ : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<RegisterOperand>("ADDR"##bitsize))>;
+
+// A BDMode paired with an immediate length operand of LENSIZE bits.
+class BDLMode<string type, string bitsize, string dispsize, string suffix,
+ string lensize>
+ : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3,
+ "BDLAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<Immediate>("imm"##bitsize))>;
+
+// A BDMode paired with a register length operand.
+class BDRMode<string type, string bitsize, string dispsize, string suffix>
+ : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<RegisterOperand>("GR"##bitsize))>;
+
+// An addressing mode with a base, displacement and a vector index.
+class BDVMode<string bitsize, string dispsize>
+ : AddressOperand<bitsize, dispsize, "", "BDVAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<RegisterOperand>("VR128"))>;
+
+//===----------------------------------------------------------------------===//
+// Extracting immediate operands from nodes
+// These all create MVT::i64 nodes to ensure the value is not sign-extended
+// when converted from an SDNode to a MachineOperand later on.
+//===----------------------------------------------------------------------===//
+
+// Bits 0-15 (counting from the lsb).
+def LL16 : SDNodeXForm<imm, [{
+ uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+// Bits 16-31 (counting from the lsb).
+def LH16 : SDNodeXForm<imm, [{
+ uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+// Bits 32-47 (counting from the lsb).
+def HL16 : SDNodeXForm<imm, [{
+ uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+// Bits 48-63 (counting from the lsb).
+def HH16 : SDNodeXForm<imm, [{
+ uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+// Low 32 bits.
+def LF32 : SDNodeXForm<imm, [{
+ uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+// High 32 bits.
+def HF32 : SDNodeXForm<imm, [{
+ uint64_t Value = N->getZExtValue() >> 32;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit signed quantity.
+def SIMM8 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity.
+def UIMM8 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit.
+def UIMM8EVEN : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 12-bit unsigned quantity.
+def UIMM12 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit signed quantity.
+def SIMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit unsigned quantity.
+def UIMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit signed quantity.
+def SIMM32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit unsigned quantity.
+def UIMM32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 48-bit unsigned quantity.
+def UIMM48 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint64_t(N->getZExtValue()) & 0xffffffffffff,
+ SDLoc(N), MVT::i64);
+}]>;
+
+// Negate and then truncate an immediate to a 32-bit unsigned quantity.
+def NEGIMM32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediate asm operands.
+//===----------------------------------------------------------------------===//
+
+def U1Imm : ImmediateAsmOperand<"U1Imm">;
+def U2Imm : ImmediateAsmOperand<"U2Imm">;
+def U3Imm : ImmediateAsmOperand<"U3Imm">;
+def U4Imm : ImmediateAsmOperand<"U4Imm">;
+def U6Imm : ImmediateAsmOperand<"U6Imm">;
+def S8Imm : ImmediateAsmOperand<"S8Imm">;
+def U8Imm : ImmediateAsmOperand<"U8Imm">;
+def U12Imm : ImmediateAsmOperand<"U12Imm">;
+def S16Imm : ImmediateAsmOperand<"S16Imm">;
+def U16Imm : ImmediateAsmOperand<"U16Imm">;
+def S32Imm : ImmediateAsmOperand<"S32Imm">;
+def U32Imm : ImmediateAsmOperand<"U32Imm">;
+def U48Imm : ImmediateAsmOperand<"U48Imm">;
+
+//===----------------------------------------------------------------------===//
+// i32 immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being zero.
+def imm32ll16 : Immediate<i32, [{
+ return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm32lh16 : Immediate<i32, [{
+ return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being one.
+def imm32ll16c : Immediate<i32, [{
+ return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm32lh16c : Immediate<i32, [{
+ return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+// Short immediates
+def imm32zx1 : Immediate<i32, [{
+ return isUInt<1>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U1Imm">;
+
+def imm32zx2 : Immediate<i32, [{
+ return isUInt<2>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U2Imm">;
+
+def imm32zx3 : Immediate<i32, [{
+ return isUInt<3>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U3Imm">;
+
+def imm32zx4 : Immediate<i32, [{
+ return isUInt<4>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U4Imm">;
+
+// Note: this enforces an even value during code generation only.
+// When used from the assembler, any 4-bit value is allowed.
+def imm32zx4even : Immediate<i32, [{
+ return isUInt<4>(N->getZExtValue());
+}], UIMM8EVEN, "U4Imm">;
+
+def imm32zx6 : Immediate<i32, [{
+ return isUInt<6>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U6Imm">;
+
+def imm32sx8 : Immediate<i32, [{
+ return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm32zx8 : Immediate<i32, [{
+ return isUInt<8>(N->getZExtValue());
+}], UIMM8, "U8Imm">;
+
+def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
+
+def imm32zx12 : Immediate<i32, [{
+ return isUInt<12>(N->getZExtValue());
+}], UIMM12, "U12Imm">;
+
+def imm32sx16 : Immediate<i32, [{
+ return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm32zx16 : Immediate<i32, [{
+ return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+
+// Full 32-bit immediates. we need both signed and unsigned versions
+// because the assembler is picky. E.g. AFI requires signed operands
+// while NILF requires unsigned ones.
+def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
+def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
+
+def imm32 : ImmLeaf<i32, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being zero.
+def imm64ll16 : Immediate<i64, [{
+ return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm64lh16 : Immediate<i64, [{
+ return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+def imm64hl16 : Immediate<i64, [{
+ return SystemZ::isImmHL(N->getZExtValue());
+}], HL16, "U16Imm">;
+
+def imm64hh16 : Immediate<i64, [{
+ return SystemZ::isImmHH(N->getZExtValue());
+}], HH16, "U16Imm">;
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being one.
+def imm64ll16c : Immediate<i64, [{
+ return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm64lh16c : Immediate<i64, [{
+ return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+def imm64hl16c : Immediate<i64, [{
+ return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
+}], HL16, "U16Imm">;
+
+def imm64hh16c : Immediate<i64, [{
+ return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
+}], HH16, "U16Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being zero.
+def imm64lf32 : Immediate<i64, [{
+ return SystemZ::isImmLF(N->getZExtValue());
+}], LF32, "U32Imm">;
+
+def imm64hf32 : Immediate<i64, [{
+ return SystemZ::isImmHF(N->getZExtValue());
+}], HF32, "U32Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being one.
+def imm64lf32c : Immediate<i64, [{
+ return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
+}], LF32, "U32Imm">;
+
+def imm64hf32c : Immediate<i64, [{
+ return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
+}], HF32, "U32Imm">;
+
+// Short immediates.
+def imm64sx8 : Immediate<i64, [{
+ return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm64zx8 : Immediate<i64, [{
+ return isUInt<8>(N->getSExtValue());
+}], UIMM8, "U8Imm">;
+
+def imm64sx16 : Immediate<i64, [{
+ return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm64zx16 : Immediate<i64, [{
+ return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm64sx32 : Immediate<i64, [{
+ return isInt<32>(N->getSExtValue());
+}], SIMM32, "S32Imm">;
+
+def imm64zx32 : Immediate<i64, [{
+ return isUInt<32>(N->getZExtValue());
+}], UIMM32, "U32Imm">;
+
+def imm64zx32n : Immediate<i64, [{
+ return isUInt<32>(-N->getSExtValue());
+}], NEGIMM32, "U32Imm">;
+
+def imm64zx48 : Immediate<i64, [{
+ return isUInt<64>(N->getZExtValue());
+}], UIMM48, "U48Imm">;
+
+def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediates
+//===----------------------------------------------------------------------===//
+
+// Floating-point zero.
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+// Floating point negative zero.
+def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
+
+//===----------------------------------------------------------------------===//
+// Symbolic address operands
+//===----------------------------------------------------------------------===//
+
+// PC-relative asm operands.
+def PCRel12 : PCRelAsmOperand<"12">;
+def PCRel16 : PCRelAsmOperand<"16">;
+def PCRel24 : PCRelAsmOperand<"24">;
+def PCRel32 : PCRelAsmOperand<"32">;
+def PCRelTLS16 : PCRelTLSAsmOperand<"16">;
+def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
+
+// PC-relative offsets of a basic block. The offset is sign-extended
+// and multiplied by 2.
+def brtarget16 : PCRelOperand<OtherVT, PCRel16> {
+ let EncoderMethod = "getPC16DBLEncoding";
+ let DecoderMethod = "decodePC16DBLBranchOperand";
+}
+def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
+ let EncoderMethod = "getPC32DBLEncoding";
+ let DecoderMethod = "decodePC32DBLBranchOperand";
+}
+
+// Variants of brtarget for use with branch prediction preload.
+def brtarget12bpp : PCRelOperand<OtherVT, PCRel12> {
+ let EncoderMethod = "getPC12DBLBPPEncoding";
+ let DecoderMethod = "decodePC12DBLBranchOperand";
+}
+def brtarget16bpp : PCRelOperand<OtherVT, PCRel16> {
+ let EncoderMethod = "getPC16DBLBPPEncoding";
+ let DecoderMethod = "decodePC16DBLBranchOperand";
+}
+def brtarget24bpp : PCRelOperand<OtherVT, PCRel24> {
+ let EncoderMethod = "getPC24DBLBPPEncoding";
+ let DecoderMethod = "decodePC24DBLBranchOperand";
+}
+
+// Variants of brtarget16/32 with an optional additional TLS symbol.
+// These are used to annotate calls to __tls_get_offset.
+def tlssym : Operand<i64> { }
+def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> {
+ let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym);
+ let EncoderMethod = "getPC16DBLTLSEncoding";
+ let DecoderMethod = "decodePC16DBLBranchOperand";
+}
+def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> {
+ let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym);
+ let EncoderMethod = "getPC32DBLTLSEncoding";
+ let DecoderMethod = "decodePC32DBLBranchOperand";
+}
+
+// A PC-relative offset of a global value. The offset is sign-extended
+// and multiplied by 2.
+def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
+ let EncoderMethod = "getPC32DBLEncoding";
+ let DecoderMethod = "decodePC32DBLOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing modes
+//===----------------------------------------------------------------------===//
+
+// 12-bit displacement operands.
+def disp12imm32 : Operand<i32>;
+def disp12imm64 : Operand<i64>;
+
+// 20-bit displacement operands.
+def disp20imm32 : Operand<i32>;
+def disp20imm64 : Operand<i64>;
+
+def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">;
+def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">;
+def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">;
+def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">;
+def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
+def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
+def BDLAddr64Disp12Len4 : AddressAsmOperand<"BDLAddr", "64", "12", "Len4">;
+def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
+def BDRAddr64Disp12 : AddressAsmOperand<"BDRAddr", "64", "12">;
+def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">;
+
+// DAG patterns and operands for addressing modes. Each mode has
+// the form <type><range><group>[<len>] where:
+//
+// <type> is one of:
+// shift : base + displacement (32-bit)
+// bdaddr : base + displacement
+// mviaddr : like bdaddr, but reject cases with a natural index
+// bdxaddr : base + displacement + index
+// laaddr : like bdxaddr, but used for Load Address operations
+// dynalloc : base + displacement + index + ADJDYNALLOC
+// bdladdr : base + displacement with a length field
+// bdvaddr : base + displacement with a vector index
+//
+// <range> is one of:
+// 12 : the displacement is an unsigned 12-bit value
+// 20 : the displacement is a signed 20-bit value
+//
+// <group> is one of:
+// pair : used when there is an equivalent instruction with the opposite
+// range value (12 or 20)
+// only : used when there is no equivalent instruction with the opposite
+// range value
+//
+// <len> is one of:
+//
+// <empty> : there is no length field
+// len8 : the length field is 8 bits, with a range of [1, 0x100].
+def shift12only : BDMode <"BDAddr", "32", "12", "Only">;
+def shift20only : BDMode <"BDAddr", "32", "20", "Only">;
+def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">;
+def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">;
+def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">;
+def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">;
+def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">;
+def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">;
+def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">;
+def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">;
+def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">;
+def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">;
+def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">;
+def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">;
+def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">;
+def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">;
+def bdladdr12onlylen4 : BDLMode<"BDLAddr", "64", "12", "Only", "4">;
+def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">;
+def bdraddr12only : BDRMode<"BDRAddr", "64", "12", "Only">;
+def bdvaddr12only : BDVMode< "64", "12">;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous
+//===----------------------------------------------------------------------===//
+
+// A 4-bit condition-code mask.
+def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
+ Operand<i32> {
+ let PrintMethod = "printCond4Operand";
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
new file mode 100644
index 000000000000..ab2392809f3b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -0,0 +1,683 @@
+//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Type profiles
+//===----------------------------------------------------------------------===//
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>]>;
+def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>]>;
+def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ZICmp : SDTypeProfile<0, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZBRCCMask : SDTypeProfile<0, 3,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, OtherVT>]>;
+def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
+def SDT_ZWrapPtr : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_ZWrapOffset : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
+def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, untyped>,
+ SDTCisVT<1, untyped>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZGR128Binary64 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, untyped>,
+ SDTCisVT<1, untyped>,
+ SDTCisVT<2, i64>]>;
+def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>,
+ SDTCisVT<5, i32>]>;
+def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>,
+ SDTCisVT<5, i32>,
+ SDTCisVT<6, i32>]>;
+def SDT_ZMemMemLength : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i64>]>;
+def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i64>]>;
+def SDT_ZString : SDTypeProfile<1, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_ZPrefetch : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>]>;
+def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
+ [SDTCisInt<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDT_ZTBegin : SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>,
+ SDTCisVT<1, i32>]>;
+def SDT_ZInsertVectorElt : SDTypeProfile<1, 3,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZExtractVectorElt : SDTypeProfile<1, 2,
+ [SDTCisVec<1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZReplicate : SDTypeProfile<1, 1,
+ [SDTCisVec<0>]>;
+def SDT_ZVecUnaryConv : SDTypeProfile<1, 1,
+ [SDTCisVec<0>,
+ SDTCisVec<1>]>;
+def SDT_ZVecUnary : SDTypeProfile<1, 1,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>]>;
+def SDT_ZVecBinary : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def SDT_ZVecBinaryInt : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZVecBinaryConv : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZRotateMask : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZJoinDwords : SDTypeProfile<1, 2,
+ [SDTCisVT<0, v2i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>]>;
+def SDT_ZVecTernary : SDTypeProfile<1, 3,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisVT<4, i32>]>;
+def SDT_ZTest : SDTypeProfile<0, 2, [SDTCisVT<1, i64>]>;
+
+//===----------------------------------------------------------------------===//
+// Node definitions
+//===----------------------------------------------------------------------===//
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+ [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
+ SDNPOutGlue]>;
+def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
+
+// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details.
+def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
+def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
+ SDT_ZWrapOffset, []>;
+def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>;
+def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>;
+def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>;
+def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
+ [SDNPHasChain, SDNPInGlue]>;
+def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
+ [SDNPInGlue]>;
+def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
+def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
+def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
+def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
+def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
+def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
+
+def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest, [SDNPOutGlue]>;
+
+// Defined because the index is an i32 rather than a pointer.
+def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
+ SDT_ZInsertVectorElt>;
+def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+ SDT_ZExtractVectorElt>;
+def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
+def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
+def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
+def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
+def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>;
+def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>;
+def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>;
+def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>;
+def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS",
+ SDT_ZVecTernaryInt>;
+def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
+def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
+def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
+def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
+def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
+def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
+def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
+def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
+def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
+def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
+def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
+def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>;
+def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt,
+ [SDNPOutGlue]>;
+def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt,
+ [SDNPOutGlue]>;
+def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary,
+ [SDNPOutGlue]>;
+def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt,
+ [SDNPOutGlue]>;
+def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
+ SDT_ZVecQuaternaryInt, [SDNPOutGlue]>;
+def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt,
+ [SDNPOutGlue]>;
+
+class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
+ : SDNode<"SystemZISD::"##name, profile,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
+def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">;
+def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">;
+def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">;
+def z_atomic_loadw_and : AtomicWOp<"ATOMIC_LOADW_AND">;
+def z_atomic_loadw_or : AtomicWOp<"ATOMIC_LOADW_OR">;
+def z_atomic_loadw_xor : AtomicWOp<"ATOMIC_LOADW_XOR">;
+def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">;
+def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">;
+def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">;
+def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
+def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
+def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
+
+def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
+ [SDNPInGlue]>;
+def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+
+def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>;
+def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>;
+def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments
+//===----------------------------------------------------------------------===//
+
+def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
+def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
+def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
+
+def z_strvh : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr, i16)>;
+def z_strv : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr, i32)>;
+def z_strvg : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr, i64)>;
+
+// Signed and unsigned comparisons.
+def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::UnsignedOnly;
+}]>;
+def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::SignedOnly;
+}]>;
+
+// Register- and memory-based TEST UNDER MASK.
+def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
+def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
+
+// Register sign-extend operations. Sub-32-bit values are represented as i32s.
+def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
+def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
+def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
+
+// Match extensions of an i32 to an i64, followed by an in-register sign
+// extension from a sub-i32 value.
+def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>;
+def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>;
+
+// Register zero-extend operations. Sub-32-bit values are represented as i32s.
+def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
+def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
+def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
+
+// Extending loads in which the extension type can be signed.
+def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD;
+}]>;
+def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending loads in which the extension type can be unsigned.
+def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD;
+}]>;
+def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending loads in which the extension type doesn't matter.
+def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
+}]>;
+def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Aligned loads.
+class AlignedLoad<SDPatternOperator load>
+ : PatFrag<(ops node:$addr), (load node:$addr), [{
+ auto *Load = cast<LoadSDNode>(N);
+ return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
+}]>;
+def aligned_load : AlignedLoad<load>;
+def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
+def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
+def aligned_azextloadi16 : AlignedLoad<azextloadi16>;
+def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
+
+// Aligned stores.
+class AlignedStore<SDPatternOperator store>
+ : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+ auto *Store = cast<StoreSDNode>(N);
+ return Store->getAlignment() >= Store->getMemoryVT().getStoreSize();
+}]>;
+def aligned_store : AlignedStore<store>;
+def aligned_truncstorei16 : AlignedStore<truncstorei16>;
+def aligned_truncstorei32 : AlignedStore<truncstorei32>;
+
+// Non-volatile loads. Used for instructions that might access the storage
+// location multiple times.
+class NonvolatileLoad<SDPatternOperator load>
+ : PatFrag<(ops node:$addr), (load node:$addr), [{
+ auto *Load = cast<LoadSDNode>(N);
+ return !Load->isVolatile();
+}]>;
+def nonvolatile_load : NonvolatileLoad<load>;
+def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
+def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
+def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
+
+// Non-volatile stores.
+class NonvolatileStore<SDPatternOperator store>
+ : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+ auto *Store = cast<StoreSDNode>(N);
+ return !Store->isVolatile();
+}]>;
+def nonvolatile_store : NonvolatileStore<store>;
+def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
+def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
+def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
+
+// A store of a load that can be implemented using MVC.
+def mvc_store : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore node:$value, node:$addr),
+ [{ return storeLoadCanUseMVC(N); }]>;
+
+// Binary read-modify-write operations on memory in which the other
+// operand is also memory and for which block operations like NC can
+// be used. There are two patterns for each operator, depending on
+// which operand contains the "other" load.
+multiclass block_op<SDPatternOperator operator> {
+ def "1" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator node:$value,
+ (unindexedload node:$addr)),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 0); }]>;
+ def "2" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator (unindexedload node:$addr),
+ node:$value),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 1); }]>;
+}
+defm block_and : block_op<and>;
+defm block_or : block_op<or>;
+defm block_xor : block_op<xor>;
+
+// Insertions.
+def inserti8 : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, -256), node:$src2)>;
+def insertll : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffffffffffff0000), node:$src2)>;
+def insertlh : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>;
+def inserthl : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>;
+def inserthh : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>;
+def insertlf : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffffffff00000000), node:$src2)>;
+def inserthf : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0x00000000ffffffff), node:$src2)>;
+
+// ORs that can be treated as insertions.
+def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
+ (or node:$src1, node:$src2), [{
+ unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
+ return CurDAG->MaskedValueIsZero(N->getOperand(0),
+ APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// ORs that can be treated as reversed insertions.
+def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
+ (or node:$src1, node:$src2), [{
+ unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
+ return CurDAG->MaskedValueIsZero(N->getOperand(1),
+ APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// Negative integer absolute.
+def z_inegabs : PatFrag<(ops node:$src), (ineg (z_iabs node:$src))>;
+
+// Integer absolute, matching the canonical form generated by DAGCombiner.
+def z_iabs32 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 31))),
+ (sra node:$src, (i32 31)))>;
+def z_iabs64 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 63))),
+ (sra node:$src, (i32 63)))>;
+def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
+def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
+
+// Integer multiply-and-add
+def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (add (mul node:$src1, node:$src2), node:$src3)>;
+
+// Fused multiply-subtract, using the natural operand order.
+def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src1, node:$src2, (fneg node:$src3))>;
+
+// Fused multiply-add and multiply-subtract, but with the order of the
+// operands matching SystemZ's MA and MS instructions.
+def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src2, node:$src3, node:$src1)>;
+def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src2, node:$src3, (fneg node:$src1))>;
+
+// Floating-point negative absolute.
+def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+
+// Create a unary operator that loads from memory and then performs
+// the given operation on it.
+class loadu<SDPatternOperator operator, SDPatternOperator load = load>
+ : PatFrag<(ops node:$addr), (operator (load node:$addr))>;
+
+// Create a store operator that performs the given unary operation
+// on the value before storing it.
+class storeu<SDPatternOperator operator, SDPatternOperator store = store>
+ : PatFrag<(ops node:$value, node:$addr),
+ (store (operator node:$value), node:$addr)>;
+
+// Create a store operator that performs the given inherent operation
+// and stores the resulting value.
+class storei<SDPatternOperator operator, SDPatternOperator store = store>
+ : PatFrag<(ops node:$addr),
+ (store (operator), node:$addr)>;
+
+// Vector representation of all-zeros and all-ones.
+def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
+def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
+
+// Load a scalar and replicate it in all elements of a vector.
+class z_replicate_load<ValueType scalartype, SDPatternOperator load>
+ : PatFrag<(ops node:$addr),
+ (z_replicate (scalartype (load node:$addr)))>;
+def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
+def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
+def z_replicate_loadi32 : z_replicate_load<i32, load>;
+def z_replicate_loadi64 : z_replicate_load<i64, load>;
+def z_replicate_loadf32 : z_replicate_load<f32, load>;
+def z_replicate_loadf64 : z_replicate_load<f64, load>;
+
+// Load a scalar and insert it into a single element of a vector.
+class z_vle<ValueType scalartype, SDPatternOperator load>
+ : PatFrag<(ops node:$vec, node:$addr, node:$index),
+ (z_vector_insert node:$vec, (scalartype (load node:$addr)),
+ node:$index)>;
+def z_vlei8 : z_vle<i32, anyextloadi8>;
+def z_vlei16 : z_vle<i32, anyextloadi16>;
+def z_vlei32 : z_vle<i32, load>;
+def z_vlei64 : z_vle<i64, load>;
+def z_vlef32 : z_vle<f32, load>;
+def z_vlef64 : z_vle<f64, load>;
+
+// Load a scalar and insert it into the low element of the high i64 of a
+// zeroed vector.
+class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
+ : PatFrag<(ops node:$addr),
+ (z_vector_insert (z_vzero),
+ (scalartype (load node:$addr)), (i32 index))>;
+def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
+def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
+def z_vllezi32 : z_vllez<i32, load, 1>;
+def z_vllezi64 : PatFrag<(ops node:$addr),
+ (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+// We use high merges to form a v4f32 from four f32s. Propagating zero
+// into all elements but index 1 gives this expression.
+def z_vllezf32 : PatFrag<(ops node:$addr),
+ (bitconvert
+ (z_merge_high
+ (v2i64
+ (z_unpackl_high
+ (v4i32
+ (bitconvert
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))))))),
+ (v2i64 (z_vzero))))>;
+def z_vllezf64 : PatFrag<(ops node:$addr),
+ (z_merge_high
+ (scalar_to_vector (f64 (load node:$addr))),
+ (z_vzero))>;
+
+// Store one element of a vector.
+class z_vste<ValueType scalartype, SDPatternOperator store>
+ : PatFrag<(ops node:$vec, node:$addr, node:$index),
+ (store (scalartype (z_vector_extract node:$vec, node:$index)),
+ node:$addr)>;
+def z_vstei8 : z_vste<i32, truncstorei8>;
+def z_vstei16 : z_vste<i32, truncstorei16>;
+def z_vstei32 : z_vste<i32, store>;
+def z_vstei64 : z_vste<i64, store>;
+def z_vstef32 : z_vste<f32, store>;
+def z_vstef64 : z_vste<f64, store>;
+
+// Arithmetic negation on vectors.
+def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+
+// Bitwise negation on vectors.
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+
+// Signed "integer greater than zero" on vectors.
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+
+// Signed "integer less than zero" on vectors.
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+
+// Integer absolute on vectors.
+class z_viabs<int shift>
+ : PatFrag<(ops node:$src),
+ (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
+ (z_vsra_by_scalar node:$src, (i32 shift)))>;
+def z_viabs8 : z_viabs<7>;
+def z_viabs16 : z_viabs<15>;
+def z_viabs32 : z_viabs<31>;
+def z_viabs64 : z_viabs<63>;
+
+// Sign-extend the i64 elements of a vector.
+class z_vse<int shift>
+ : PatFrag<(ops node:$src),
+ (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>;
+def z_vsei8 : z_vse<56>;
+def z_vsei16 : z_vse<48>;
+def z_vsei32 : z_vse<32>;
+
+// ...and again with the extensions being done on individual i64 scalars.
+class z_vse_by_parts<SDPatternOperator operator, int index1, int index2>
+ : PatFrag<(ops node:$src),
+ (z_join_dwords
+ (operator (z_vector_extract node:$src, index1)),
+ (operator (z_vector_extract node:$src, index2)))>;
+def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>;
+def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>;
+def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
new file mode 100644
index 000000000000..16a7ed784d70
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -0,0 +1,169 @@
+//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Record that INSN performs a 64-bit version of unary operator OPERATOR
+// in which the operand is sign-extended from 32 to 64 bits.
+multiclass SXU<SDPatternOperator operator, Instruction insn> {
+ def : Pat<(operator (sext (i32 GR32:$src))),
+ (insn GR32:$src)>;
+ def : Pat<(operator (sext_inreg GR64:$src, i32)),
+ (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+}
+
+// Record that INSN performs a 64-bit version of binary operator OPERATOR
+// in which the first operand has class CLS and which the second operand
+// is sign-extended from a 32-bit register.
+multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
+ Instruction insn> {
+ def : Pat<(operator cls:$src1, (sext GR32:$src2)),
+ (insn cls:$src1, GR32:$src2)>;
+ def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
+}
+
+// Like SXB, but for zero extension.
+multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
+ Instruction insn> {
+ def : Pat<(operator cls:$src1, (zext GR32:$src2)),
+ (insn cls:$src1, GR32:$src2)>;
+ def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
+}
+
+// Record that INSN performs a binary read-modify-write operation,
+// with LOAD, OPERATOR and STORE being the read, modify and write
+// respectively. MODE is the addressing mode and IMM is the type
+// of the second operand.
+class RMWI<SDPatternOperator load, SDPatternOperator operator,
+ SDPatternOperator store, AddressingMode mode,
+ PatFrag imm, Instruction insn>
+ : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr),
+ (insn mode:$addr, (UIMM8 imm:$src))>;
+
+// Record that INSN performs binary operation OPERATION on a byte
+// memory location. IMM is the type of the second operand.
+multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
+ Instruction insn> {
+ def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>;
+}
+
+// Record that INSN performs insertion TYPE into a register of class CLS.
+// The inserted operand is loaded using LOAD from an address of mode MODE.
+multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
+ SDPatternOperator load, AddressingMode mode> {
+ def : Pat<(!cast<SDPatternOperator>("or_as_"##type)
+ cls:$src1, (load mode:$src2)),
+ (insn cls:$src1, mode:$src2)>;
+ def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type)
+ (load mode:$src2), cls:$src1),
+ (insn cls:$src1, mode:$src2)>;
+}
+
+// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64<Instruction insn, SDPatternOperator operator,
+ AddressingMode mode>
+ : Pat<(operator GR64:$R1, mode:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>;
+
+// INSN and INSNY are an RX/RXY pair of instructions that store the low
+// 32 bits of a GPR to memory. Record that they are equivalent to using
+// OPERATOR to store a GR64.
+multiclass StoreGR64Pair<Instruction insn, Instruction insny,
+ SDPatternOperator operator> {
+ def : StoreGR64<insn, operator, bdxaddr12pair>;
+ def : StoreGR64<insny, operator, bdxaddr20pair>;
+}
+
+// INSN stores the low 32 bits of a GPR using PC-relative addressing.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64PC<Instruction insn, SDPatternOperator operator>
+ : Pat<(operator GR64:$R1, pcrel32:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory,
+// with INSN storing when the condition is true and INSNINV storing when the
+// condition is false. Record that they are equivalent to a LOAD/select/STORE
+// sequence for GR64s.
+multiclass CondStores64<Instruction insn, Instruction insninv,
+ SDPatternOperator store, SDPatternOperator load,
+ AddressingMode mode> {
+ def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
+ imm32zx4:$valid, imm32zx4:$cc),
+ mode:$addr),
+ (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ imm32zx4:$valid, imm32zx4:$cc)>;
+ def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
+ imm32zx4:$valid, imm32zx4:$cc),
+ mode:$addr),
+ (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ imm32zx4:$valid, imm32zx4:$cc)>;
+}
+
+// Try to use MVC instruction INSN for a load of type LOAD followed by a store
+// of the same size. VT is the type of the intermediate (legalized) value and
+// LENGTH is the number of bytes loaded by LOAD.
+multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn,
+ bits<5> length> {
+ def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
+ (insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
+}
+
+// Use NC-like instruction INSN for block_op operation OPERATOR.
+// The other operand is a load of type LOAD, which accesses LENGTH bytes.
+// VT is the intermediate legalized type in which the binary operation
+// is actually done.
+multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load,
+ ValueType vt, Instruction insn, bits<5> length> {
+ def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
+ (insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
+}
+
+// A convenient way of generating all block peepholes for a particular
+// LOAD/VT/LENGTH combination.
+multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
+ Instruction mvc, Instruction nc, Instruction oc,
+ Instruction xc, bits<5> length> {
+ defm : MVCLoadStore<load, vt, mvc, length>;
+ defm : BinaryLoadStore<block_and1, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_and2, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_or1, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_or2, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_xor1, load, vt, xc, length>;
+ defm : BinaryLoadStore<block_xor2, load, vt, xc, length>;
+}
+
+// Record that INSN is a LOAD AND TEST that can be used to compare
+// registers in CLS against zero. The instruction has separate R1 and R2
+// operands, but they must be the same when the instruction is used like this.
+multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
+ def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+ // The sign of the zero makes no difference.
+ def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
+}
+
+// Use INSN for performing binary operation OPERATION of type VT
+// on registers of class CLS.
+class BinaryRRWithType<Instruction insn, RegisterOperand cls,
+ SDPatternOperator operator, ValueType vt>
+ : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>;
+
+// Use INSN to perform conversion operation OPERATOR, with the input being
+// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions
+// and 0 to allow them. MODE is the rounding mode to use.
+class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
+ TypedReg tr2, bits<3> suppress, bits<4> mode>
+ : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
+ (insn tr2.op:$vec, suppress, mode)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
new file mode 100644
index 000000000000..1cdc0949ff4a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -0,0 +1,35 @@
+//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Processor definitions.
+//
+// For compatibility with other compilers on the platform, each model can
+// be identifed either by the system name (e.g. z10) or the level of the
+// architecture the model supports, as identified by the edition level
+// of the z/Architecture Principles of Operation document (e.g. arch8).
+//
+// The minimum architecture level supported by LLVM is as defined in
+// the Eighth Edition of the PoP (i.e. as implemented on z10).
+//
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"generic", NoSchedModel, []>;
+
+def : ProcessorModel<"arch8", NoSchedModel, Arch8SupportedFeatures.List>;
+def : ProcessorModel<"z10", NoSchedModel, Arch8SupportedFeatures.List>;
+
+def : ProcessorModel<"arch9", Z196Model, Arch9SupportedFeatures.List>;
+def : ProcessorModel<"z196", Z196Model, Arch9SupportedFeatures.List>;
+
+def : ProcessorModel<"arch10", ZEC12Model, Arch10SupportedFeatures.List>;
+def : ProcessorModel<"zEC12", ZEC12Model, Arch10SupportedFeatures.List>;
+
+def : ProcessorModel<"arch11", Z13Model, Arch11SupportedFeatures.List>;
+def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 000000000000..d14a0fb0b0b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,159 @@
+//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZRegisterInfo.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+SystemZRegisterInfo::SystemZRegisterInfo()
+ : SystemZGenRegisterInfo(SystemZ::R14D) {}
+
+const MCPhysReg *
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ if (MF->getSubtarget().getTargetLowering()->supportSwiftError() &&
+ MF->getFunction()->getAttributes().hasAttrSomewhere(
+ Attribute::SwiftError))
+ return CSR_SystemZ_SwiftError_SaveList;
+ return CSR_SystemZ_SaveList;
+}
+
+const uint32_t *
+SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ if (MF.getSubtarget().getTargetLowering()->supportSwiftError() &&
+ MF.getFunction()->getAttributes().hasAttrSomewhere(
+ Attribute::SwiftError))
+ return CSR_SystemZ_SwiftError_RegMask;
+ return CSR_SystemZ_RegMask;
+}
+
+BitVector
+SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const SystemZFrameLowering *TFI = getFrameLowering(MF);
+
+ if (TFI->hasFP(MF)) {
+ // R11D is the frame pointer. Reserve all aliases.
+ Reserved.set(SystemZ::R11D);
+ Reserved.set(SystemZ::R11L);
+ Reserved.set(SystemZ::R11H);
+ Reserved.set(SystemZ::R10Q);
+ }
+
+ // R15D is the stack pointer. Reserve all aliases.
+ Reserved.set(SystemZ::R15D);
+ Reserved.set(SystemZ::R15L);
+ Reserved.set(SystemZ::R15H);
+ Reserved.set(SystemZ::R14Q);
+
+ // A0 and A1 hold the thread pointer.
+ Reserved.set(SystemZ::A0);
+ Reserved.set(SystemZ::A1);
+
+ return Reserved;
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Outgoing arguments should be part of the frame");
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ auto *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SystemZFrameLowering *TFI = getFrameLowering(MF);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Decompose the frame index into a base and offset.
+ int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
+ unsigned BasePtr;
+ int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
+ MI->getOperand(FIOperandNum + 1).getImm());
+
+ // Special handling of dbg_value instructions.
+ if (MI->isDebugValue()) {
+ MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false);
+ MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // See if the offset is in range, or if an equivalent instruction that
+ // accepts the offset exists.
+ unsigned Opcode = MI->getOpcode();
+ unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset);
+ if (OpcodeForOffset) {
+ if (OpcodeForOffset == SystemZ::LE &&
+ MF.getSubtarget<SystemZSubtarget>().hasVector()) {
+ // If LE is ok for offset, use LDE instead on z13.
+ OpcodeForOffset = SystemZ::LDE32;
+ }
+ MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ }
+ else {
+ // Create an anchor point that is in range. Start at 0xffff so that
+ // can use LLILH to load the immediate.
+ int64_t OldOffset = Offset;
+ int64_t Mask = 0xffff;
+ do {
+ Offset = OldOffset & Mask;
+ OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset);
+ Mask >>= 1;
+ assert(Mask && "One offset must be OK");
+ } while (!OpcodeForOffset);
+
+ unsigned ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ int64_t HighOffset = OldOffset - Offset;
+
+ if (MI->getDesc().TSFlags & SystemZII::HasIndex
+ && MI->getOperand(FIOperandNum + 2).getReg() == 0) {
+ // Load the offset into the scratch register and use it as an index.
+ // The scratch register then dies here.
+ TII->loadImmediate(MBB, MI, ScratchReg, HighOffset);
+ MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg,
+ false, false, true);
+ } else {
+ // Load the anchor address into a scratch register.
+ unsigned LAOpcode = TII->getOpcodeForOffset(SystemZ::LA, HighOffset);
+ if (LAOpcode)
+ BuildMI(MBB, MI, DL, TII->get(LAOpcode),ScratchReg)
+ .addReg(BasePtr).addImm(HighOffset).addReg(0);
+ else {
+ // Load the high offset into the scratch register and use it as
+ // an index.
+ TII->loadImmediate(MBB, MI, ScratchReg, HighOffset);
+ BuildMI(MBB, MI, DL, TII->get(SystemZ::AGR),ScratchReg)
+ .addReg(ScratchReg, RegState::Kill).addReg(BasePtr);
+ }
+
+ // Use the scratch register as the base. It then dies here.
+ MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg,
+ false, false, true);
+ }
+ }
+ MI->setDesc(TII->get(OpcodeForOffset));
+ MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const SystemZFrameLowering *TFI = getFrameLowering(MF);
+ return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 000000000000..e41c06c98af2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,67 @@
+//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
+
+#include "SystemZ.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+namespace SystemZ {
+// Return the subreg to use for referring to the even and odd registers
+// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
+inline unsigned even128(bool Is32bit) {
+ return Is32bit ? subreg_hl32 : subreg_h64;
+}
+inline unsigned odd128(bool Is32bit) {
+ return Is32bit ? subreg_l32 : subreg_l64;
+}
+} // end namespace SystemZ
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+public:
+ SystemZRegisterInfo();
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0
+ /// register, hence ADDR64.
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind=0) const override {
+ return &SystemZ::ADDR64BitRegClass;
+ }
+
+ // Override TargetRegisterInfo.h.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
+ return true;
+ }
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const override;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 000000000000..36809ea81dc1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,316 @@
+//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions.
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+ let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "SystemZ";
+}
+
+let Namespace = "SystemZ" in {
+def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
+def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
+def subreg_l64 : SubRegIndex<64, 0>;
+def subreg_h64 : SubRegIndex<64, 64>;
+def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
+def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
+def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
+def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
+def subreg_hr32 : ComposedSubRegIndex<subreg_h64, subreg_r32>;
+}
+
+// Define a register class that contains values of types TYPES and an
+// associated operand called NAME. SIZE is the size and alignment
+// of the registers and REGLIST is the list of individual registers.
+multiclass SystemZRegClass<string name, list<ValueType> types, int size,
+ dag regList, bit allocatable = 1> {
+ def AsmOperand : AsmOperandClass {
+ let Name = name;
+ let ParserMethod = "parse"##name;
+ let RenderMethod = "addRegOperands";
+ }
+ let isAllocatable = allocatable in
+ def Bit : RegisterClass<"SystemZ", types, size, regList> {
+ let Size = size;
+ }
+ def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
+ let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// General-purpose registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+
+// One of the 16 64-bit general-purpose registers.
+class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_l32, subreg_h32];
+}
+
+// 8 even-odd pairs of GPR64s.
+class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_l64, subreg_h64];
+}
+
+// General-purpose registers
+foreach I = 0-15 in {
+ def R#I#L : GPR32<I, "r"#I>;
+ def R#I#H : GPR32<I, "r"#I>;
+ def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>,
+ DwarfRegNum<[I]>;
+}
+
+foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
+ def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"),
+ !cast<GPR64>("R"#I#"D")>;
+}
+
+/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+defm GR32 : SystemZRegClass<"GR32", [i32], 32,
+ (add (sequence "R%uL", 0, 5),
+ (sequence "R%uL", 15, 6))>;
+defm GRH32 : SystemZRegClass<"GRH32", [i32], 32,
+ (add (sequence "R%uH", 0, 5),
+ (sequence "R%uH", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", [i64], 64,
+ (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))>;
+
+// Combine the low and high GR32s into a single class. This can only be
+// used for virtual registers if the high-word facility is available.
+defm GRX32 : SystemZRegClass<"GRX32", [i32], 32,
+ (add (sequence "R%uL", 0, 5),
+ (sequence "R%uH", 0, 5),
+ R15L, R15H, R14L, R14H, R13L, R13H,
+ R12L, R12H, R11L, R11H, R10L, R10H,
+ R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>;
+
+// The architecture doesn't really have any i128 support, so model the
+// register pairs as untyped instead.
+defm GR128 : SystemZRegClass<"GR128", [untyped], 128,
+ (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>;
+
+// Base and index registers. Everything except R0, which in an address
+// context evaluates as 0.
+defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>;
+defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>;
+
+// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
+// of a GR128.
+defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>;
+
+// Any type register. Used for .insn directives when we don't know what the
+// register types could be.
+defm AnyReg : SystemZRegClass<"AnyReg",
+ [i64, f64, v8i8, v4i16, v2i32, v2f32], 64,
+ (add (sequence "R%uD", 0, 15),
+ (sequence "F%uD", 0, 15),
+ (sequence "V%u", 0, 15))>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point registers
+//===----------------------------------------------------------------------===//
+
+// Maps FPR register numbers to their DWARF encoding.
+class DwarfMapping<int id> { int Id = id; }
+
+def F0Dwarf : DwarfMapping<16>;
+def F2Dwarf : DwarfMapping<17>;
+def F4Dwarf : DwarfMapping<18>;
+def F6Dwarf : DwarfMapping<19>;
+
+def F1Dwarf : DwarfMapping<20>;
+def F3Dwarf : DwarfMapping<21>;
+def F5Dwarf : DwarfMapping<22>;
+def F7Dwarf : DwarfMapping<23>;
+
+def F8Dwarf : DwarfMapping<24>;
+def F10Dwarf : DwarfMapping<25>;
+def F12Dwarf : DwarfMapping<26>;
+def F14Dwarf : DwarfMapping<27>;
+
+def F9Dwarf : DwarfMapping<28>;
+def F11Dwarf : DwarfMapping<29>;
+def F13Dwarf : DwarfMapping<30>;
+def F15Dwarf : DwarfMapping<31>;
+
+def F16Dwarf : DwarfMapping<68>;
+def F18Dwarf : DwarfMapping<69>;
+def F20Dwarf : DwarfMapping<70>;
+def F22Dwarf : DwarfMapping<71>;
+
+def F17Dwarf : DwarfMapping<72>;
+def F19Dwarf : DwarfMapping<73>;
+def F21Dwarf : DwarfMapping<74>;
+def F23Dwarf : DwarfMapping<75>;
+
+def F24Dwarf : DwarfMapping<76>;
+def F26Dwarf : DwarfMapping<77>;
+def F28Dwarf : DwarfMapping<78>;
+def F30Dwarf : DwarfMapping<79>;
+
+def F25Dwarf : DwarfMapping<80>;
+def F27Dwarf : DwarfMapping<81>;
+def F29Dwarf : DwarfMapping<82>;
+def F31Dwarf : DwarfMapping<83>;
+
+// Upper 32 bits of one of the floating-point registers
+class FPR32<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+
+// One of the floating-point registers.
+class FPR64<bits<16> num, string n, FPR32 high>
+ : SystemZRegWithSubregs<n, [high]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_r32];
+}
+
+// 8 pairs of FPR64s, with a one-register gap inbetween.
+class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_l64, subreg_h64];
+}
+
+// Floating-point registers. Registers 16-31 require the vector facility.
+foreach I = 0-15 in {
+ def F#I#S : FPR32<I, "f"#I>;
+ def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
+foreach I = 16-31 in {
+ def F#I#S : FPR32<I, "v"#I>;
+ def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>,
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
+
+foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
+ def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
+ !cast<FPR64>("F"#I#"D")>;
+}
+
+// There's no store-multiple instruction for FPRs, so we're not fussy
+// about the order in which call-saved registers are allocated.
+defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>;
+defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>;
+defm FP128 : SystemZRegClass<"FP128", [f128], 128,
+ (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>;
+
+//===----------------------------------------------------------------------===//
+// Vector registers
+//===----------------------------------------------------------------------===//
+
+// A full 128-bit vector register, with an FPR64 as its high part.
+class VR128<bits<16> num, string n, FPR64 high>
+ : SystemZRegWithSubregs<n, [high]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_r64];
+}
+
+// Full vector registers.
+foreach I = 0-31 in {
+ def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>,
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
+
+// Class used to store 32-bit values in the first element of a vector
+// register. f32 scalars are used for the WLEDB and WLDEB instructions.
+defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32,
+ (add (sequence "F%uS", 0, 7),
+ (sequence "F%uS", 16, 31),
+ (sequence "F%uS", 8, 15))>;
+
+// Class used to store 64-bit values in the upper half of a vector register.
+// The vector facility also includes scalar f64 instructions that operate
+// on the full vector register set.
+defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64,
+ (add (sequence "F%uD", 0, 7),
+ (sequence "F%uD", 16, 31),
+ (sequence "F%uD", 8, 15))>;
+
+// The subset of vector registers that can be used for floating-point
+// operations too.
+defm VF128 : SystemZRegClass<"VF128",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (sequence "V%u", 0, 15)>;
+
+// All vector registers.
+defm VR128 : SystemZRegClass<"VR128",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (add (sequence "V%u", 0, 7),
+ (sequence "V%u", 16, 31),
+ (sequence "V%u", 8, 15))>;
+
+// Attaches a ValueType to a register operand, to make the instruction
+// definitions easier.
+class TypedReg<ValueType vtin, RegisterOperand opin> {
+ ValueType vt = vtin;
+ RegisterOperand op = opin;
+}
+
+def v32eb : TypedReg<f32, VR32>;
+def v64g : TypedReg<i64, VR64>;
+def v64db : TypedReg<f64, VR64>;
+def v128b : TypedReg<v16i8, VR128>;
+def v128h : TypedReg<v8i16, VR128>;
+def v128f : TypedReg<v4i32, VR128>;
+def v128g : TypedReg<v2i64, VR128>;
+def v128q : TypedReg<v16i8, VR128>;
+def v128eb : TypedReg<v4f32, VR128>;
+def v128db : TypedReg<v2f64, VR128>;
+def v128any : TypedReg<untyped, VR128>;
+
+//===----------------------------------------------------------------------===//
+// Other registers
+//===----------------------------------------------------------------------===//
+
+// The 2-bit condition code field of the PSW. Every register named in an
+// inline asm needs a class associated with it.
+def CC : SystemZReg<"cc">;
+let isAllocatable = 0 in
+ def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
+
+// Access registers.
+class ACR32<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+foreach I = 0-15 in {
+ def A#I : ACR32<I, "a"#I>, DwarfRegNum<[!add(I, 48)]>;
+}
+defm AR32 : SystemZRegClass<"AR32", [i32], 32,
+ (add (sequence "A%u", 0, 15)), 0>;
+
+// Control registers.
+class CREG64<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+foreach I = 0-15 in {
+ def C#I : CREG64<I, "c"#I>, DwarfRegNum<[!add(I, 32)]>;
+}
+defm CR64 : SystemZRegClass<"CR64", [i64], 64,
+ (add (sequence "C%u", 0, 15)), 0>;
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
new file mode 100644
index 000000000000..1ce0168f95e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -0,0 +1,81 @@
+//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Scheduler resources
+// Resources ending with a '2' use that resource for 2 cycles. An instruction
+// using two such resources use the mapped unit for 4 cycles, and 2 is added
+// to the total number of uops of the sched class.
+
+// These three resources are used to express decoder grouping rules.
+// The number of decoder slots needed by an instructions is normally
+// one. For a cracked instruction (BeginGroup && !EndGroup) it is
+// two. Expanded instructions (BeginGroup && EndGroup) group alone.
+def GroupAlone : SchedWrite;
+def BeginGroup : SchedWrite;
+def EndGroup : SchedWrite;
+
+// Latencies, to make code a bit neater. If more than one resource is
+// used for an instruction, the greatest latency (not the sum) will be
+// output by Tablegen. Therefore, in such cases one of these resources
+// is needed.
+def Lat2 : SchedWrite;
+def Lat3 : SchedWrite;
+def Lat4 : SchedWrite;
+def Lat5 : SchedWrite;
+def Lat6 : SchedWrite;
+def Lat7 : SchedWrite;
+def Lat8 : SchedWrite;
+def Lat9 : SchedWrite;
+def Lat10 : SchedWrite;
+def Lat11 : SchedWrite;
+def Lat12 : SchedWrite;
+def Lat15 : SchedWrite;
+def Lat20 : SchedWrite;
+def Lat30 : SchedWrite;
+
+// Fixed-point
+def FXa : SchedWrite;
+def FXa2 : SchedWrite;
+def FXb : SchedWrite;
+def FXU : SchedWrite;
+
+// Load/store unit
+def LSU : SchedWrite;
+
+// Model a return without latency, otherwise if-converter will model
+// extra cost and abort (currently there is an assert that checks that
+// all instructions have at least one uop).
+def LSU_lat1 : SchedWrite;
+
+// Floating point unit (zEC12 and earlier)
+def FPU : SchedWrite;
+def FPU2 : SchedWrite;
+def DFU : SchedWrite;
+def DFU2 : SchedWrite;
+
+// Vector sub units (z13)
+def VecBF : SchedWrite;
+def VecBF2 : SchedWrite;
+def VecDF : SchedWrite;
+def VecDF2 : SchedWrite;
+def VecDFX : SchedWrite;
+def VecDFX2 : SchedWrite;
+def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit.
+def VecMul : SchedWrite;
+def VecStr : SchedWrite;
+def VecXsPm : SchedWrite;
+
+// Virtual branching unit
+def VBU : SchedWrite;
+
+
+include "SystemZScheduleZ13.td"
+include "SystemZScheduleZEC12.td"
+include "SystemZScheduleZ196.td"
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
new file mode 100644
index 000000000000..adc9f2976f87
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -0,0 +1,1503 @@
+//-- SystemZScheduleZ13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z13 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z13Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch11UnsupportedFeatures.List;
+
+ let IssueWidth = 8;
+ let MicroOpBufferSize = 60; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 20;
+}
+
+let SchedModel = Z13Model in {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 0;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<BeginGroup, []> {
+ let NumMicroOps = 0;
+ let BeginGroup = 1;
+}
+def : WriteRes<EndGroup, []> {
+ let NumMicroOps = 0;
+ let EndGroup = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def Z13_FXaUnit : ProcResource<2>;
+def Z13_FXbUnit : ProcResource<2>;
+def Z13_LSUnit : ProcResource<2>;
+def Z13_VecUnit : ProcResource<2>;
+def Z13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Z13_VBUnit : ProcResource<2>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXa, [Z13_FXaUnit]> { let Latency = 1; }
+def : WriteRes<FXa2, [Z13_FXaUnit, Z13_FXaUnit]> { let Latency = 2; }
+def : WriteRes<FXb, [Z13_FXbUnit]> { let Latency = 1; }
+def : WriteRes<LSU, [Z13_LSUnit]> { let Latency = 4; }
+def : WriteRes<VecBF, [Z13_VecUnit]> { let Latency = 8; }
+def : WriteRes<VecBF2, [Z13_VecUnit, Z13_VecUnit]> { let Latency = 9; }
+def : WriteRes<VecDF, [Z13_VecUnit]> { let Latency = 8; }
+def : WriteRes<VecDF2, [Z13_VecUnit, Z13_VecUnit]> { let Latency = 9; }
+def : WriteRes<VecDFX, [Z13_VecUnit]> { let Latency = 1; }
+def : WriteRes<VecDFX2, [Z13_VecUnit, Z13_VecUnit]> { let Latency = 2; }
+def : WriteRes<VecFPd, [Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+ Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit]>
+ { let Latency = 30; }
+def : WriteRes<VecMul, [Z13_VecUnit]> { let Latency = 5; }
+def : WriteRes<VecStr, [Z13_VecUnit]> { let Latency = 4; }
+def : WriteRes<VecXsPm, [Z13_VecUnit]> { let Latency = 3; }
+def : WriteRes<VBU, [Z13_VBUnit]>; // Virtual Branching Unit
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXa, FXa, FXb, FXb, Lat4, GroupAlone],
+ (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXb], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXb, FXb, Lat2, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[VBU], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXb], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[VBU, FXa, FXa, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[FXb, EndGroup], (instregex "Return$")>;
+def : InstRW<[FXb], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo
+def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXb, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[FXb, LSU, LSU, LSU, Lat8, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXa], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXa], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXa], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXa], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXa], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux|CBB)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXa], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXa], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXa], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXa], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LR(Mux)?$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSU], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[FXb, LSU, Lat5], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[FXa, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXa], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXb, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXa], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXa], (instregex "LTGFR$")>;
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSU], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[FXb, Lat30, GroupAlone], (instregex "LMD$")>;
+
+// Store multiple (estimated average of ceil(5/2) FXb ops)
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10,
+ GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[FXa], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat2], (instregex "LP(G)?R$")>;
+def : InstRW<[FXa, FXa, Lat3, BeginGroup], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXa, Lat2], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXa], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXa], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXa], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "IILF(64)?$")>;
+def : InstRW<[FXa], (instregex "IILH(64)?$")>;
+def : InstRW<[FXa], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "A(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "AH(Y)?$")>;
+def : InstRW<[FXa], (instregex "AIH$")>;
+def : InstRW<[FXa], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "AG$")>;
+def : InstRW<[FXa], (instregex "AGFI$")>;
+def : InstRW<[FXa], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXa], (instregex "AGR(K)?$")>;
+def : InstRW<[FXa], (instregex "AHI(K)?$")>;
+def : InstRW<[FXa], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "AL(Y)?$")>;
+def : InstRW<[FXa], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "ALG(F)?$")>;
+def : InstRW<[FXa], (instregex "ALGHSIK$")>;
+def : InstRW<[FXa], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXa], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXa], (instregex "ALR(K)?$")>;
+def : InstRW<[FXa], (instregex "AR(K)?$")>;
+def : InstRW<[FXa], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXa], (instregex "ALSIH(N)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXa, Lat2, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXa, LSU, Lat6], (instregex "AGF$")>;
+def : InstRW<[FXa, Lat2], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "SH(Y)?$")>;
+def : InstRW<[FXa], (instregex "SGR(K)?$")>;
+def : InstRW<[FXa], (instregex "SLFI$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXa], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXa], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXa], (instregex "SLR(K)?$")>;
+def : InstRW<[FXa], (instregex "SR(K)?$")>;
+def : InstRW<[FXa], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXa, Lat2, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXa, LSU, Lat6], (instregex "SGF$")>;
+def : InstRW<[FXa, Lat2], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXa], (instregex "NGR(K)?$")>;
+def : InstRW<[FXa], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXa], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "NILF(64)?$")>;
+def : InstRW<[FXa], (instregex "NILH(64)?$")>;
+def : InstRW<[FXa], (instregex "NILL(64)?$")>;
+def : InstRW<[FXa], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXa], (instregex "OGR(K)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXa], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXa], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "OILF(64)?$")>;
+def : InstRW<[FXa], (instregex "OILH(64)?$")>;
+def : InstRW<[FXa], (instregex "OILL(64)?$")>;
+def : InstRW<[FXa], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXa], (instregex "XIFMux$")>;
+def : InstRW<[FXa], (instregex "XGR(K)?$")>;
+def : InstRW<[FXa], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "XILF(64)?$")>;
+def : InstRW<[FXa], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat10], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXa, Lat6], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXa, LSU, Lat12], (instregex "MSG$")>;
+def : InstRW<[FXa, Lat8], (instregex "MSGR$")>;
+def : InstRW<[FXa, Lat6], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXa, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXa, Lat5], (instregex "MGHI$")>;
+def : InstRW<[FXa, Lat5], (instregex "MHI$")>;
+def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>;
+def : InstRW<[FXa, Lat7, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>;
+def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>;
+def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SLA(G|K)?$")>;
+def : InstRW<[FXa, FXa, FXa, FXa, Lat8], (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXa], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXa], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXa], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXa], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXa, FXa, Lat3, BeginGroup], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXb], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXb], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXb], (instregex "C(G)?R$")>;
+def : InstRW<[FXb], (instregex "CIH$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXb], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXb], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXb], (instregex "CLGR$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXb], (instregex "CLIH$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXb], (instregex "CLR$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXb], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXb, LSU, Lat6], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXa, FXb, LSU, Lat6, BeginGroup], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXb, LSU, Lat6], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "CLC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXb, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXb], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXb], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXb], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXb], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXb], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[FXb], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXb, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXa, FXb, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXa, FXa, FXb, FXb, FXa, LSU, Lat10, GroupAlone],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone],
+ (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXb, FXb, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>;
+def : InstRW<[FXb, VecDF, FXb, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>;
+
+def : InstRW<[FXb, VecDFX, LSU, LSU, Lat9, GroupAlone],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[FXb, VecDFX2, LSU, LSU, Lat30, GroupAlone],
+ (instregex "(M|D)P$")>;
+def : InstRW<[FXb, FXb, VecDFX2, LSU, LSU, LSU, Lat15, GroupAlone],
+ (instregex "SRP$")>;
+def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>;
+def : InstRW<[VecDFX, LSU, Lat4, GroupAlone], (instregex "TP$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXa, Lat5, BeginGroup], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXa, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXa, FXa, FXb, Lat5, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXb], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[FXb, Lat2, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXa, FXb, Lat2, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat15, GroupAlone],
+ (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[FXa], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXa], (instregex "AEXT128_64$")>;
+def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>;
+
+// String instructions
+def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>;
+def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
+
+// Execute
+def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXb], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXb, FXb, Lat2, BeginGroup], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[VecXsPm], (instregex "LER$")>;
+def : InstRW<[FXb], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXb, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXb, FXb, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)BR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone],
+ (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[VecXsPm], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[VecXsPm], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm, LSU, Lat7], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXb, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "LEDBR(A)?$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[VecBF, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[VecBF], (instregex "LDEBR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12 , GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)BR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat11, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLFDBR$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXb], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXb], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXb], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)B$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIEBR(A)?$")>;
+def : InstRW<[VecBF], (instregex "FIDBR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[VecBF], (instregex "A(E|D)BR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[VecBF], (instregex "S(E|D)BR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)B$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[VecFPd, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecXsPm, LSU, Lat8], (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "(K|C)(E|D)BR?$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[LSU, VecXsPm, Lat9], (instregex "TC(E|D)B$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>;
+def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[VecBF], (instregex "LEXR$")>;
+def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXb], (instregex "LDER$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[VecBF], (instregex "THD(E)?R$")>;
+def : InstRW<[VecBF], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[VecBF], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIER$")>;
+def : InstRW<[VecBF], (instregex "FIDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[VecBF], (instregex "M(A|S)DR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecDF], (instregex "LTDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[VecDF], (instregex "LDETR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[VecDF], (instregex "FIDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecDF], (instregex "ADTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[VecDF], (instregex "SDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[VecDF], (instregex "QADTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecDF], (instregex "(K|C)DTR$")>;
+def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[VecDF], (instregex "CEDTR$")>;
+def : InstRW<[VecDF], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "VLR(32|64)?$")>;
+def : InstRW<[FXb, Lat4], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[FXb], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VZERO$")>;
+def : InstRW<[VecXsPm], (instregex "VONE$")>;
+def : InstRW<[VecXsPm], (instregex "VGBM$")>;
+def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "VL(L|BB)?$")>;
+def : InstRW<[LSU], (instregex "VL(32|64)$")>;
+def : InstRW<[LSU], (instregex "VLLEZ(B|F|G|H)?$")>;
+def : InstRW<[LSU], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, LSU, Lat7], (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VGE(F|G)$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "VLM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat8], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[FXb, LSU, Lat8], (instregex "VSTE(F|G)$")>;
+def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VSTE(B|H)$")>;
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat20, GroupAlone],
+ (instregex "VSTM$")>;
+def : InstRW<[FXb, FXb, LSU, Lat12, BeginGroup], (instregex "VSCE(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VPERM$")>;
+def : InstRW<[VecXsPm], (instregex "VPDI$")>;
+def : InstRW<[VecXsPm], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VN(C|O)?$")>;
+def : InstRW<[VecXsPm], (instregex "VO$")>;
+def : InstRW<[VecMul], (instregex "VCKSM$")>;
+def : InstRW<[VecXsPm], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VX$")>;
+def : InstRW<[VecMul], (instregex "VGFM?$")>;
+def : InstRW<[VecMul], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[VecMul], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[VecXsPm], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[VecMul], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[VecMul], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VML(B|F)?$")>;
+def : InstRW<[VecMul], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[VecMul], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMO(B|F|H)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VPOPCT$")>;
+
+def : InstRW<[VecXsPm], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>;
+def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>;
+def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>;
+def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>;
+
+def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[VecXsPm], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[VecMul], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[VecMul], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm, Lat4], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecBF2], (instregex "VCD(G|GB|LG|LGB)$")>;
+def : InstRW<[VecBF], (instregex "WCD(GB|LGB)$")>;
+def : InstRW<[VecBF2], (instregex "VC(L)?GD$")>;
+def : InstRW<[VecBF2], (instregex "VFADB$")>;
+def : InstRW<[VecBF], (instregex "WFADB$")>;
+def : InstRW<[VecBF2], (instregex "VCGDB$")>;
+def : InstRW<[VecBF], (instregex "WCGDB$")>;
+def : InstRW<[VecBF2], (instregex "VF(I|M|A|S)$")>;
+def : InstRW<[VecBF2], (instregex "VF(I|M|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WF(I|M|S)DB$")>;
+def : InstRW<[VecBF2], (instregex "VCLGDB$")>;
+def : InstRW<[VecBF], (instregex "WCLGDB$")>;
+def : InstRW<[VecXsPm], (instregex "VFL(C|N|P)DB$")>;
+def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)DB$")>;
+def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>;
+def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>;
+def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI(DB)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WFTCIDB$")>;
+def : InstRW<[VecBF2], (instregex "VL(DE|ED)$")>;
+def : InstRW<[VecBF2], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>;
+
+// divide / square root
+def : InstRW<[VecFPd], (instregex "VFD$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
+def : InstRW<[VecFPd], (instregex "VFSQ$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)$")>;
+def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)DB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
+def : InstRW<[VecXsPm], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WFC(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LEFR$")>;
+def : InstRW<[FXb, Lat4], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecStr], (instregex "VFAE(B)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAEBS$")>;
+def : InstRW<[VecStr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAE(F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[VecStr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[VecStr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXa, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXb], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXa, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXb, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXb, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXb, Lat30], (instregex "TB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXb, Lat30], (instregex "PR$")>;
+def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXb, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
+ (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXb], (instregex "MC$")>;
+def : InstRW<[FXb, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXb], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXb, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LPP$")>;
+def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXb, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXb, Lat30], (instregex "SAL$")>;
+
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
new file mode 100644
index 000000000000..128049a09086
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -0,0 +1,1196 @@
+//=- SystemZScheduleZ196.td - SystemZ Scheduling Definitions ---*- tblgen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z196 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z196Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch9UnsupportedFeatures.List;
+
+ let IssueWidth = 5;
+ let MicroOpBufferSize = 40; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = Z196Model in {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 0;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<EndGroup, []> {
+ let NumMicroOps = 0;
+ let EndGroup = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def Z196_FXUnit : ProcResource<2>;
+def Z196_LSUnit : ProcResource<2>;
+def Z196_FPUnit : ProcResource<1>;
+def Z196_DFUnit : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXU, [Z196_FXUnit]> { let Latency = 1; }
+def : WriteRes<LSU, [Z196_LSUnit]> { let Latency = 4; }
+def : WriteRes<LSU_lat1, [Z196_LSUnit]> { let Latency = 1; }
+def : WriteRes<FPU, [Z196_FPUnit]> { let Latency = 8; }
+def : WriteRes<FPU2, [Z196_FPUnit, Z196_FPUnit]> { let Latency = 9; }
+def : WriteRes<DFU, [Z196_DFUnit]> { let Latency = 2; }
+def : WriteRes<DFU2, [Z196_DFUnit, Z196_DFUnit]> { let Latency = 3; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BRCT(G|H)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXU, FXU, FXU, LSU, Lat7, GroupAlone],
+ (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXU, LSU, Lat5, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[LSU, EndGroup], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXU], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(F|G)IT(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[LSU, FXU, FXU, Lat6, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[LSU, FXU, FXU, Lat6, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[LSU, FXU, FXU, Lat6, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[LSU_lat1, EndGroup], (instregex "Return$")>;
+def : InstRW<[LSU_lat1, EndGroup], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo
+def : InstRW<[FXU], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXU, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU, LSU, LSU, FXU, Lat8, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXU], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXU], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+
+// Load and test
+def : InstRW<[FXU, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXU], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXU, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat6, EndGroup], (instregex "LOC(G)?(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+def : InstRW<[FXU], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXU], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXU], (instregex "LTGFR$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|F|H|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|F|H|T|FRL|HRL)$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>;
+
+// Store multiple (estimated average of 3 ops)
+def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone],
+ (instregex "STM(H|Y|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2], (instregex "LP(G)?R$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU, Lat2], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?(Y|SI)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "AH(Y)?$")>;
+def : InstRW<[FXU], (instregex "AIH$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ALGF$")>;
+def : InstRW<[FXU], (instregex "ALGHSIK$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "AGF$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "SH(Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLFI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "SGF$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXU], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "XIFMux$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat10], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>;
+def : InstRW<[FXU, Lat8], (instregex "MSGR$")>;
+def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXU, Lat5], (instregex "MGHI$")>;
+def : InstRW<[FXU, Lat5], (instregex "MHI$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>;
+def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "D$")>;
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DSG(F)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DSG(F)?$")>;
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DL(G)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU, Lat2], (instregex "SLA(G|K)?$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXU], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXU], (instregex "C(G)?R$")>;
+def : InstRW<[FXU], (instregex "CIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "CLGR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU, FXU, LSU, Lat6, Lat2, GroupAlone], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "CLC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXU, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXU], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, GroupAlone], (instregex "PFD(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, LSU, Lat10, GroupAlone],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone],
+ (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXU, FXU, LSU, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>;
+def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>;
+
+def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone],
+ (instregex "(M|D)P$")>;
+def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone],
+ (instregex "SRP$")>;
+def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>;
+def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXU, Lat5, GroupAlone], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXU, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXU, FXU, LSU, Lat8, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXU], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[LSU, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// String instructions
+def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
+def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
+
+// Execute
+def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[FXU], (instregex "LER$")>;
+def : InstRW<[FXU], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXU, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)BR$")>;
+def : InstRW<[FPU], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone],
+ (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "LEDBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[FPU, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[FPU], (instregex "LDEBR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)BR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)BR(A)?$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)BR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)BR(A)?$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLF(E|D)BR$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIEBR(A)?$")>;
+def : InstRW<[FPU], (instregex "FIDBR(A)?$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[FPU], (instregex "A(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[FPU], (instregex "S(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[FPU], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[FPU, FPU, Lat30], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[FPU, LSU, Lat15], (instregex "TC(E|D)B$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
+def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)R$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[FPU], (instregex "LEXR$")>;
+def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXU], (instregex "LDER$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[FPU], (instregex "THD(E)?R$")>;
+def : InstRW<[FPU], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[FPU], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIER$")>;
+def : InstRW<[FPU], (instregex "FIDR$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[FPU], (instregex "C(E|D)R$")>;
+def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>;
+def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
+def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
+def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXU, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXU, Lat30], (instregex "TB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>;
+def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LPP$")>;
+def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXU, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXU, Lat30], (instregex "SAL$")>;
+
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
new file mode 100644
index 000000000000..76b378454631
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -0,0 +1,1241 @@
+//=- SystemZScheduleZEC12.td - SystemZ Scheduling Definitions --*- tblgen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ZEC12 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def ZEC12Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch10UnsupportedFeatures.List;
+
+ let IssueWidth = 5;
+ let MicroOpBufferSize = 40; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = ZEC12Model in {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 0;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<EndGroup, []> {
+ let NumMicroOps = 0;
+ let EndGroup = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def ZEC12_FXUnit : ProcResource<2>;
+def ZEC12_LSUnit : ProcResource<2>;
+def ZEC12_FPUnit : ProcResource<1>;
+def ZEC12_DFUnit : ProcResource<1>;
+def ZEC12_VBUnit : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXU, [ZEC12_FXUnit]> { let Latency = 1; }
+def : WriteRes<LSU, [ZEC12_LSUnit]> { let Latency = 4; }
+def : WriteRes<LSU_lat1, [ZEC12_LSUnit]> { let Latency = 1; }
+def : WriteRes<FPU, [ZEC12_FPUnit]> { let Latency = 8; }
+def : WriteRes<FPU2, [ZEC12_FPUnit, ZEC12_FPUnit]> { let Latency = 9; }
+def : WriteRes<DFU, [ZEC12_DFUnit]> { let Latency = 2; }
+def : WriteRes<DFU2, [ZEC12_DFUnit, ZEC12_FPUnit]> { let Latency = 3; }
+def : WriteRes<VBU, [ZEC12_VBUnit]>; // Virtual Branching Unit
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[LSU, Lat4], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[LSU, Lat4], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXU, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXU, FXU, FXU, LSU, Lat7, GroupAlone],
+ (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXU], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[VBU], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXU], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[VBU, FXU, FXU, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[LSU_lat1, EndGroup], (instregex "Return$")>;
+def : InstRW<[LSU_lat1], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo
+def : InstRW<[FXU], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXU, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU, LSU, LSU, FXU, Lat8, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXU], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXU], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+
+// Load and trap
+def : InstRW<[FXU, LSU, Lat5], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[FXU, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXU], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXU, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXU], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXU], (instregex "LTGFR$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and trap
+def : InstRW<[FXU, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "LMD$")>;
+
+// Store multiple (estimated average of 3 ops)
+def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone],
+ (instregex "STM(H|Y|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2], (instregex "LP(G)?R$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU, Lat2], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?(Y|SI)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "AH(Y)?$")>;
+def : InstRW<[FXU], (instregex "AIH$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ALGF$")>;
+def : InstRW<[FXU], (instregex "ALGHSIK$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU, LSU, Lat6], (instregex "AGF$")>;
+def : InstRW<[FXU, Lat2], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "SH(Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLFI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU, LSU, Lat6], (instregex "SGF$")>;
+def : InstRW<[FXU, Lat2], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXU], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "XIFMux$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat10], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>;
+def : InstRW<[FXU, Lat8], (instregex "MSGR$")>;
+def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXU, Lat5], (instregex "MGHI$")>;
+def : InstRW<[FXU, Lat5], (instregex "MHI$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>;
+def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "D$")>;
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DSG(F)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DSG(F)?$")>;
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DL(G)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+ (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SLA(G|K)?$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXU], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXU], (instregex "C(G)?R$")>;
+def : InstRW<[FXU], (instregex "CIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "CLGR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[FXU, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU, LSU, Lat6], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXU, Lat2], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "CLC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXU, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXU], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+def : InstRW<[LSU], (instregex "BP(R)?P$")>;
+def : InstRW<[FXU], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, LSU, Lat10, GroupAlone],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone],
+ (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXU, FXU, LSU, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>;
+def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>;
+
+def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone],
+ (instregex "(M|D)P$")>;
+def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone],
+ (instregex "SRP$")>;
+def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>;
+def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXU, Lat5, GroupAlone], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXU, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXU, FXU, LSU, Lat8, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXU], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[LSU, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXU, FXU, FXU, FXU, FXU, Lat15, GroupAlone],
+ (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[LSU, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[FXU], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[FXU, LSU, Lat5], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// String instructions
+def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
+def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
+
+// Execute
+def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[FXU], (instregex "LER$")>;
+def : InstRW<[FXU], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXU, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)BR$")>;
+def : InstRW<[FPU], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone],
+ (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "LEDBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[FPU, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[FPU], (instregex "LDEBR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)BR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)BR(A?)$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)BR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)BR(A?)$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XBR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLF(E|D)BR$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIEBR(A)?$")>;
+def : InstRW<[FPU], (instregex "FIDBR(A)?$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[FPU], (instregex "A(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[FPU], (instregex "S(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[FPU], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[FPU, FPU, Lat30], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[FPU, LSU, Lat15], (instregex "TC(E|D)B$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
+def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)R$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[FPU], (instregex "LEXR$")>;
+def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXU], (instregex "LDER$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[FPU], (instregex "THD(E)?R$")>;
+def : InstRW<[FPU], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[FPU], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIER$")>;
+def : InstRW<[FPU], (instregex "FIDR$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[FPU], (instregex "C(E|D)R$")>;
+def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>;
+def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXU, LSU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>;
+def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
+def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
+def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXU, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXU, Lat30], (instregex "TB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>;
+def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LPP$")>;
+def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXU, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXU, Lat30], (instregex "SAL$")>;
+
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..657482504045
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,275 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+
+// Decide whether it is best to use a loop or straight-line code for
+// a block operation of Size bytes with source address Src and destination
+// address Dest. Sequence is the opcode to use for straight-line code
+// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
+// Return the chain for the completed operation.
+static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
+ unsigned Loop, SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size) {
+ EVT PtrVT = Src.getValueType();
+ // The heuristic we use is to prefer loops for anything that would
+ // require 7 or more MVCs. With these kinds of sizes there isn't
+ // much to choose between straight-line code and looping code,
+ // since the time will be dominated by the MVCs themselves.
+ // However, the loop has 4 or 5 instructions (depending on whether
+ // the base addresses can be proved equal), so there doesn't seem
+ // much point using a loop for 5 * 256 bytes or fewer. Anything in
+ // the range (5 * 256, 6 * 256) will need another instruction after
+ // the loop, so it doesn't seem worth using a loop then either.
+ // The next value up, 6 * 256, can be implemented in the same
+ // number of straight-line MVCs as 6 * 256 - 1.
+ if (Size > 6 * 256)
+ return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, DL, PtrVT),
+ DAG.getConstant(Size / 256, DL, PtrVT));
+ return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, DL, PtrVT));
+}
+
+SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+ if (IsVolatile)
+ return SDValue();
+
+ if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, Dst, Src, CSize->getZExtValue());
+ return SDValue();
+}
+
+// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
+// Chain, Dst, ByteVal and Size. These cases are expected to use
+// MVI, MVHHI, MVHI and MVGHI respectively.
+static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Dst, uint64_t ByteVal, uint64_t Size,
+ unsigned Align, MachinePointerInfo DstPtrInfo) {
+ uint64_t StoreVal = ByteVal;
+ for (unsigned I = 1; I < Size; ++I)
+ StoreVal |= ByteVal << (I * 8);
+ return DAG.getStore(
+ Chain, DL, DAG.getConstant(StoreVal, DL, MVT::getIntegerVT(Size * 8)),
+ Dst, DstPtrInfo, Align);
+}
+
+SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
+ SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ EVT PtrVT = Dst.getValueType();
+
+ if (IsVolatile)
+ return SDValue();
+
+ if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ uint64_t Bytes = CSize->getZExtValue();
+ if (Bytes == 0)
+ return SDValue();
+ if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) {
+ // Handle cases that can be done using at most two of
+ // MVI, MVHI, MVHHI and MVGHI. The latter two can only be
+ // used if ByteVal is all zeros or all ones; in other casees,
+ // we can move at most 2 halfwords.
+ uint64_t ByteVal = CByte->getZExtValue();
+ if (ByteVal == 0 || ByteVal == 255 ?
+ Bytes <= 16 && countPopulation(Bytes) <= 2 :
+ Bytes <= 4) {
+ unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
+ unsigned Size2 = Bytes - Size1;
+ SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1,
+ Align, DstPtrInfo);
+ if (Size2 == 0)
+ return Chain1;
+ Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(Size1, DL, PtrVT));
+ DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
+ SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
+ std::min(Align, Size1), DstPtrInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
+ }
+ } else {
+ // Handle one and two bytes using STC.
+ if (Bytes <= 2) {
+ SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
+ if (Bytes == 1)
+ return Chain1;
+ SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, DL, PtrVT));
+ SDValue Chain2 =
+ DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1),
+ /* Alignment = */ 1);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
+ }
+ }
+ assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
+
+ // Handle the special case of a memset of 0, which can use XC.
+ auto *CByte = dyn_cast<ConstantSDNode>(Byte);
+ if (CByte && CByte->getZExtValue() == 0)
+ return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
+ Chain, Dst, Dst, Bytes);
+
+ // Copy the byte to the first location and then use MVC to copy
+ // it to the rest.
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
+ SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, DL, PtrVT));
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, DstPlus1, Dst, Bytes - 1);
+ }
+ return SDValue();
+}
+
+// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
+// deciding whether to use a loop or straight-line code.
+static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, uint64_t Size) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ EVT PtrVT = Src1.getValueType();
+ // A two-CLC sequence is a clear win over a loop, not least because it
+ // needs only one branch. A three-CLC sequence needs the same number
+ // of branches as a loop (i.e. 2), but is shorter. That brings us to
+ // lengths greater than 768 bytes. It seems relatively likely that
+ // a difference will be found within the first 768 bytes, so we just
+ // optimize for the smallest number of branch instructions, in order
+ // to avoid polluting the prediction buffer too much. A loop only ever
+ // needs 2 branches, whereas a straight-line sequence would need 3 or more.
+ if (Size > 3 * 256)
+ return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, DL, PtrVT),
+ DAG.getConstant(Size / 256, DL, PtrVT));
+ return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, DL, PtrVT));
+}
+
+// Convert the current CC value into an integer that is 0 if CC == 0,
+// less than zero if CC == 1 and greater than zero if CC >= 2.
+// The sequence starts with IPM, which puts CC into bits 29 and 28
+// of an integer and clears bits 30 and 31.
+static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue,
+ SelectionDAG &DAG) {
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+ SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
+ DAG.getConstant(31, DL, MVT::i32));
+ return ROTL;
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
+ SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ uint64_t Bytes = CSize->getZExtValue();
+ assert(Bytes > 0 && "Caller should have handled 0-size case");
+ Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+ SDValue Glue = Chain.getValue(1);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+ }
+ return std::make_pair(SDValue(), SDValue());
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+ SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const {
+ // Use SRST to find the character. End is its address on success.
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
+ Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
+ Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
+ DAG.getConstant(255, DL, MVT::i32));
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, Char);
+ Chain = End.getValue(1);
+ SDValue Glue = End.getValue(2);
+
+ // Now select between End and null, depending on whether the character
+ // was found.
+ SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
+ DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
+ DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
+ Glue};
+ VTs = DAG.getVTList(PtrVT, MVT::Glue);
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
+ return std::make_pair(End, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest,
+ SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo,
+ bool isStpcpy) const {
+ SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
+ SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
+ DAG.getConstant(0, DL, MVT::i32));
+ return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
+ SDValue Src2, MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
+ SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(0, DL, MVT::i32));
+ Chain = Unused.getValue(1);
+ SDValue Glue = Chain.getValue(2);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+}
+
+// Search from Src for a null character, stopping once Src reaches Limit.
+// Return a pair of values, the first being the number of nonnull characters
+// and the second being the out chain.
+//
+// This can be used for strlen by setting Limit to 0.
+static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG,
+ const SDLoc &DL,
+ SDValue Chain, SDValue Src,
+ SDValue Limit) {
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, DAG.getConstant(0, DL, MVT::i32));
+ Chain = End.getValue(1);
+ SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
+ return std::make_pair(Len, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+ MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+ SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
+ return getBoundedStrlen(DAG, DL, Chain, Src, Limit);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 000000000000..93cd970c30c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,74 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for SelectionDAGTargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo {
+public:
+ explicit SystemZSelectionDAGInfo() = default;
+
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool IsVolatile,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Chain, SDValue Dst, SDValue Byte,
+ SDValue Size, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const override;
+
+ std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const override;
+
+ std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const override;
+
+ std::pair<SDValue, SDValue> EmitTargetCodeForStrcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest,
+ SDValue Src, MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo, bool isStpcpy) const override;
+
+ std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const override;
+
+ std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Src,
+ MachinePointerInfo SrcPtrInfo) const override;
+
+ std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrnlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
new file mode 100644
index 000000000000..7391df8342ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -0,0 +1,285 @@
+//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to replace instructions with shorter forms. For example,
+// IILF can be replaced with LLILL or LLILH if the constant fits and if the
+// other 32 bits of the GR64 destination are not live.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-shorten-inst"
+
+namespace {
+class SystemZShortenInst : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZShortenInst(const SystemZTargetMachine &tm);
+
+ StringRef getPassName() const override {
+ return "SystemZ Instruction Shortening";
+ }
+
+ bool processBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH);
+ bool shortenOn0(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn01(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
+ bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
+
+ const SystemZInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ LivePhysRegs LiveRegs;
+};
+
+char SystemZShortenInst::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
+ return new SystemZShortenInst(TM);
+}
+
+SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(nullptr) {}
+
+// Tie operands if MI has become a two-address instruction.
+static void tieOpsIfNeeded(MachineInstr &MI) {
+ if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ !MI.getOperand(0).isTied())
+ MI.tieOperands(0, 1);
+}
+
+// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
+// are the halfword immediate loads for the same word. Try to use one of them
+// instead of IIxF.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL,
+ unsigned LLIxH) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ // The new opcode will clear the other half of the GR64 reg, so
+ // cancel if that is live.
+ unsigned thisSubRegIdx =
+ (SystemZ::GRH32BitRegClass.contains(Reg) ? SystemZ::subreg_h32
+ : SystemZ::subreg_l32);
+ unsigned otherSubRegIdx =
+ (thisSubRegIdx == SystemZ::subreg_l32 ? SystemZ::subreg_h32
+ : SystemZ::subreg_l32);
+ unsigned GR64BitReg =
+ TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass);
+ unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
+ if (LiveRegs.contains(OtherReg))
+ return false;
+
+ uint64_t Imm = MI.getOperand(1).getImm();
+ if (SystemZ::isImmLL(Imm)) {
+ MI.setDesc(TII->get(LLIxL));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ return true;
+ }
+ if (SystemZ::isImmLH(Imm)) {
+ MI.setDesc(TII->get(LLIxH));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ MI.getOperand(1).setImm(Imm >> 16);
+ return true;
+ }
+ return false;
+}
+
+// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
+bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
+ MI.setDesc(TII->get(Opcode));
+ return true;
+ }
+ return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0 and 1 have a
+// 4-bit encoding.
+bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+ MI.setDesc(TII->get(Opcode));
+ return true;
+ }
+ return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
+// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0
+// with op 1, if MI becomes 2-address.
+bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+ MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
+ SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
+ MI.setDesc(TII->get(Opcode));
+ tieOpsIfNeeded(MI);
+ return true;
+ }
+ return false;
+}
+
+// Calls shortenOn001 if CCLive is false. CC def operand is added in
+// case of success.
+bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) {
+ if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) {
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ return true;
+ }
+ return false;
+}
+
+// MI is a vector-style conversion instruction with the operand order:
+// destination, source, exact-suppress, rounding-mode. If both registers
+// have a 4-bit encoding then change it to Opcode, which has operand order:
+// destination, rouding-mode, source, exact-suppress.
+bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+ MachineOperand Dest(MI.getOperand(0));
+ MachineOperand Src(MI.getOperand(1));
+ MachineOperand Suppress(MI.getOperand(2));
+ MachineOperand Mode(MI.getOperand(3));
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.RemoveOperand(1);
+ MI.RemoveOperand(0);
+ MI.setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .add(Dest)
+ .add(Mode)
+ .add(Src)
+ .add(Suppress);
+ return true;
+ }
+ return false;
+}
+
+// Process all instructions in MBB. Return true if something changed.
+bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ // Set up the set of live registers at the end of MBB (live out)
+ LiveRegs.clear();
+ LiveRegs.addLiveOuts(MBB);
+
+ // Iterate backwards through the block looking for instructions to change.
+ for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
+ case SystemZ::IILF:
+ Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH);
+ break;
+
+ case SystemZ::IIHF:
+ Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
+ break;
+
+ case SystemZ::WFADB:
+ Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
+ break;
+
+ case SystemZ::WFDDB:
+ Changed |= shortenOn001(MI, SystemZ::DDBR);
+ break;
+
+ case SystemZ::WFIDB:
+ Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
+ break;
+
+ case SystemZ::WLDEB:
+ Changed |= shortenOn01(MI, SystemZ::LDEBR);
+ break;
+
+ case SystemZ::WLEDB:
+ Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
+ break;
+
+ case SystemZ::WFMDB:
+ Changed |= shortenOn001(MI, SystemZ::MDBR);
+ break;
+
+ case SystemZ::WFLCDB:
+ Changed |= shortenOn01(MI, SystemZ::LCDFR);
+ break;
+
+ case SystemZ::WFLNDB:
+ Changed |= shortenOn01(MI, SystemZ::LNDFR);
+ break;
+
+ case SystemZ::WFLPDB:
+ Changed |= shortenOn01(MI, SystemZ::LPDFR);
+ break;
+
+ case SystemZ::WFSQDB:
+ Changed |= shortenOn01(MI, SystemZ::SQDBR);
+ break;
+
+ case SystemZ::WFSDB:
+ Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
+ break;
+
+ case SystemZ::WFCDB:
+ Changed |= shortenOn01(MI, SystemZ::CDBR);
+ break;
+
+ case SystemZ::VL32:
+ // For z13 we prefer LDE over LE to avoid partial register dependencies.
+ Changed |= shortenOn0(MI, SystemZ::LDE32);
+ break;
+
+ case SystemZ::VST32:
+ Changed |= shortenOn0(MI, SystemZ::STE);
+ break;
+
+ case SystemZ::VL64:
+ Changed |= shortenOn0(MI, SystemZ::LD);
+ break;
+
+ case SystemZ::VST64:
+ Changed |= shortenOn0(MI, SystemZ::STD);
+ break;
+ }
+
+ LiveRegs.stepBackward(MI);
+ }
+
+ return Changed;
+}
+
+bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
+ if (skipFunction(*F.getFunction()))
+ return false;
+
+ const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ LiveRegs.init(*TRI);
+
+ bool Changed = false;
+ for (auto &MBB : F)
+ Changed |= processBlock(MBB);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 000000000000..eb4a0962f7eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,68 @@
+//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+// Pin the vtable to this file.
+void SystemZSubtarget::anchor() {}
+
+SystemZSubtarget &
+SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+ return *this;
+}
+
+SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS,
+ const TargetMachine &TM)
+ : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
+ HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
+ HasPopulationCount(false), HasMessageSecurityAssist3(false),
+ HasMessageSecurityAssist4(false), HasResetReferenceBitsMultiple(false),
+ HasFastSerialization(false), HasInterlockedAccess1(false),
+ HasMiscellaneousExtensions(false),
+ HasExecutionHint(false), HasLoadAndTrap(false),
+ HasTransactionalExecution(false), HasProcessorAssist(false),
+ HasDFPZonedConversion(false), HasEnhancedDAT2(false),
+ HasVector(false), HasLoadStoreOnCond2(false),
+ HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false),
+ HasDFPPackedConversion(false),
+ TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM, *this), TSInfo(), FrameLowering() {}
+
+bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
+ CodeModel::Model CM) const {
+ // PC32DBL accesses require the low bit to be clear. Note that a zero
+ // value selects the default alignment and is therefore OK.
+ if (GV->getAlignment() == 1)
+ return false;
+
+ // For the small model, all locally-binding symbols are in range.
+ if (CM == CodeModel::Small)
+ return TLInfo.getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
+
+ // For Medium and above, assume that the symbol is not within the 4GB range.
+ // Taking the address of locally-defined text would be OK, but that
+ // case isn't easy to detect.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 000000000000..b05a1bb6cafd
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,179 @@
+//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H
+
+#include "SystemZFrameLowering.h"
+#include "SystemZISelLowering.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+ virtual void anchor();
+protected:
+ bool HasDistinctOps;
+ bool HasLoadStoreOnCond;
+ bool HasHighWord;
+ bool HasFPExtension;
+ bool HasPopulationCount;
+ bool HasMessageSecurityAssist3;
+ bool HasMessageSecurityAssist4;
+ bool HasResetReferenceBitsMultiple;
+ bool HasFastSerialization;
+ bool HasInterlockedAccess1;
+ bool HasMiscellaneousExtensions;
+ bool HasExecutionHint;
+ bool HasLoadAndTrap;
+ bool HasTransactionalExecution;
+ bool HasProcessorAssist;
+ bool HasDFPZonedConversion;
+ bool HasEnhancedDAT2;
+ bool HasVector;
+ bool HasLoadStoreOnCond2;
+ bool HasLoadAndZeroRightmostByte;
+ bool HasMessageSecurityAssist5;
+ bool HasDFPPackedConversion;
+
+private:
+ Triple TargetTriple;
+ SystemZInstrInfo InstrInfo;
+ SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
+ SystemZFrameLowering FrameLowering;
+
+ SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
+public:
+ SystemZSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM);
+
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const SystemZRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const SystemZTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ // This is important for reducing register pressure in vector code.
+ bool useAA() const override { return true; }
+
+ // Always enable the early if-conversion pass.
+ bool enableEarlyIfConversion() const override { return true; }
+
+ // Automatically generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ // Return true if the target has the distinct-operands facility.
+ bool hasDistinctOps() const { return HasDistinctOps; }
+
+ // Return true if the target has the load/store-on-condition facility.
+ bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; }
+
+ // Return true if the target has the load/store-on-condition facility 2.
+ bool hasLoadStoreOnCond2() const { return HasLoadStoreOnCond2; }
+
+ // Return true if the target has the high-word facility.
+ bool hasHighWord() const { return HasHighWord; }
+
+ // Return true if the target has the floating-point extension facility.
+ bool hasFPExtension() const { return HasFPExtension; }
+
+ // Return true if the target has the population-count facility.
+ bool hasPopulationCount() const { return HasPopulationCount; }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 3.
+ bool hasMessageSecurityAssist3() const { return HasMessageSecurityAssist3; }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 4.
+ bool hasMessageSecurityAssist4() const { return HasMessageSecurityAssist4; }
+
+ // Return true if the target has the reset-reference-bits-multiple facility.
+ bool hasResetReferenceBitsMultiple() const {
+ return HasResetReferenceBitsMultiple;
+ }
+
+ // Return true if the target has the fast-serialization facility.
+ bool hasFastSerialization() const { return HasFastSerialization; }
+
+ // Return true if the target has interlocked-access facility 1.
+ bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
+
+ // Return true if the target has the miscellaneous-extensions facility.
+ bool hasMiscellaneousExtensions() const {
+ return HasMiscellaneousExtensions;
+ }
+
+ // Return true if the target has the execution-hint facility.
+ bool hasExecutionHint() const { return HasExecutionHint; }
+
+ // Return true if the target has the load-and-trap facility.
+ bool hasLoadAndTrap() const { return HasLoadAndTrap; }
+
+ // Return true if the target has the transactional-execution facility.
+ bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+ // Return true if the target has the processor-assist facility.
+ bool hasProcessorAssist() const { return HasProcessorAssist; }
+
+ // Return true if the target has the DFP zoned-conversion facility.
+ bool hasDFPZonedConversion() const { return HasDFPZonedConversion; }
+
+ // Return true if the target has the enhanced-DAT facility 2.
+ bool hasEnhancedDAT2() const { return HasEnhancedDAT2; }
+
+ // Return true if the target has the load-and-zero-rightmost-byte facility.
+ bool hasLoadAndZeroRightmostByte() const {
+ return HasLoadAndZeroRightmostByte;
+ }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 5.
+ bool hasMessageSecurityAssist5() const { return HasMessageSecurityAssist5; }
+
+ // Return true if the target has the DFP packed-conversion facility.
+ bool hasDFPPackedConversion() const { return HasDFPPackedConversion; }
+
+ // Return true if the target has the vector facility.
+ bool hasVector() const { return HasVector; }
+
+ // Return true if GV can be accessed using LARL for reloc model RM
+ // and code model CM.
+ bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp
new file mode 100644
index 000000000000..5dbd23d420a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -0,0 +1,382 @@
+//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for instructions that can be replaced by a Test Data Class
+// instruction, and replaces them when profitable.
+//
+// Roughly, the following rules are recognized:
+//
+// 1: fcmp pred X, 0 -> tdc X, mask
+// 2: fcmp pred X, +-inf -> tdc X, mask
+// 3: fcmp pred X, +-minnorm -> tdc X, mask
+// 4: tdc (fabs X), mask -> tdc X, newmask
+// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
+// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
+// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
+// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
+// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
+// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
+//
+// The pass works in 4 steps:
+//
+// 1. All fcmp and icmp instructions in a function are checked for a match
+// with rules 1-3 and 5-7. Their TDC equivalents are stored in
+// the ConvertedInsts mapping. If the operand of a fcmp instruction is
+// a fabs, it's also folded according to rule 4.
+// 2. All and/or/xor i1 instructions whose both operands have been already
+// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
+// as a queue of instructions to check.
+// 3. All mapped instructions that are considered worthy of conversion (ie.
+// replacing them will actually simplify the final code) are replaced
+// with a call to the s390.tdc intrinsic.
+// 4. All intermediate results of replaced instructions are removed if unused.
+//
+// Instructions that match rules 1-3 are considered unworthy of conversion
+// on their own (since a comparison instruction is superior), but are mapped
+// in the hopes of folding the result using rules 4 and 8-10 (likely removing
+// the original comparison in the process).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include <deque>
+#include <set>
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeSystemZTDCPassPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZTDCPass : public FunctionPass {
+public:
+ static char ID;
+ SystemZTDCPass() : FunctionPass(ID) {
+ initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+private:
+ // Maps seen instructions that can be mapped to a TDC, values are
+ // (TDC operand, TDC mask, worthy flag) triples.
+ MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
+ // The queue of and/or/xor i1 instructions to be potentially folded.
+ std::vector<BinaryOperator *> LogicOpsWorklist;
+ // Instructions matched while folding, to be removed at the end if unused.
+ std::set<Instruction *> PossibleJunk;
+
+ // Tries to convert a fcmp instruction.
+ void convertFCmp(CmpInst &I);
+
+ // Tries to convert an icmp instruction.
+ void convertICmp(CmpInst &I);
+
+ // Tries to convert an i1 and/or/xor instruction, whose both operands
+ // have been already converted.
+ void convertLogicOp(BinaryOperator &I);
+
+ // Marks an instruction as converted - adds it to ConvertedInsts and adds
+ // any and/or/xor i1 users to the queue.
+ void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
+ ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
+ auto &M = *I->getFunction()->getParent();
+ auto &Ctx = M.getContext();
+ for (auto *U : I->users()) {
+ auto *LI = dyn_cast<BinaryOperator>(U);
+ if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
+ (LI->getOpcode() == Instruction::And ||
+ LI->getOpcode() == Instruction::Or ||
+ LI->getOpcode() == Instruction::Xor)) {
+ LogicOpsWorklist.push_back(LI);
+ }
+ }
+ }
+};
+
+} // end anonymous namespace
+
+char SystemZTDCPass::ID = 0;
+INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
+ "SystemZ Test Data Class optimization", false, false)
+
+FunctionPass *llvm::createSystemZTDCPass() {
+ return new SystemZTDCPass();
+}
+
+void SystemZTDCPass::convertFCmp(CmpInst &I) {
+ Value *Op0 = I.getOperand(0);
+ auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
+ auto Pred = I.getPredicate();
+ // Only comparisons with consts are interesting.
+ if (!Const)
+ return;
+ // Compute the smallest normal number (and its negation).
+ auto &Sem = Op0->getType()->getFltSemantics();
+ APFloat Smallest = APFloat::getSmallestNormalized(Sem);
+ APFloat NegSmallest = Smallest;
+ NegSmallest.changeSign();
+ // Check if Const is one of our recognized consts.
+ int WhichConst;
+ if (Const->isZero()) {
+ // All comparisons with 0 can be converted.
+ WhichConst = 0;
+ } else if (Const->isInfinity()) {
+ // Likewise for infinities.
+ WhichConst = Const->isNegative() ? 2 : 1;
+ } else if (Const->isExactlyValue(Smallest)) {
+ // For Smallest, we cannot do EQ separately from GT.
+ if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
+ (Pred & CmpInst::FCMP_OGE) != 0)
+ return;
+ WhichConst = 3;
+ } else if (Const->isExactlyValue(NegSmallest)) {
+ // Likewise for NegSmallest, we cannot do EQ separately from LT.
+ if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
+ (Pred & CmpInst::FCMP_OLE) != 0)
+ return;
+ WhichConst = 4;
+ } else {
+ // Not one of our special constants.
+ return;
+ }
+ // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
+ static const int Masks[][4] = {
+ { // 0
+ SystemZ::TDCMASK_ZERO, // eq
+ SystemZ::TDCMASK_POSITIVE, // gt
+ SystemZ::TDCMASK_NEGATIVE, // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // inf
+ SystemZ::TDCMASK_INFINITY_PLUS, // eq
+ 0, // gt
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_NEGATIVE |
+ SystemZ::TDCMASK_NORMAL_PLUS |
+ SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // -inf
+ SystemZ::TDCMASK_INFINITY_MINUS, // eq
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_POSITIVE |
+ SystemZ::TDCMASK_NORMAL_MINUS |
+ SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+ 0, // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // minnorm
+ 0, // eq (unsupported)
+ (SystemZ::TDCMASK_NORMAL_PLUS |
+ SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_NEGATIVE |
+ SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // -minnorm
+ 0, // eq (unsupported)
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_POSITIVE |
+ SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+ (SystemZ::TDCMASK_NORMAL_MINUS |
+ SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
+ SystemZ::TDCMASK_NAN, // un
+ }
+ };
+ // Construct the mask as a combination of the partial masks.
+ int Mask = 0;
+ if (Pred & CmpInst::FCMP_OEQ)
+ Mask |= Masks[WhichConst][0];
+ if (Pred & CmpInst::FCMP_OGT)
+ Mask |= Masks[WhichConst][1];
+ if (Pred & CmpInst::FCMP_OLT)
+ Mask |= Masks[WhichConst][2];
+ if (Pred & CmpInst::FCMP_UNO)
+ Mask |= Masks[WhichConst][3];
+ // A lone fcmp is unworthy of tdc conversion on its own, but may become
+ // worthy if combined with fabs.
+ bool Worthy = false;
+ if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
+ Function *F = CI->getCalledFunction();
+ if (F && F->getIntrinsicID() == Intrinsic::fabs) {
+ // Fold with fabs - adjust the mask appropriately.
+ Mask &= SystemZ::TDCMASK_PLUS;
+ Mask |= Mask >> 1;
+ Op0 = CI->getArgOperand(0);
+ // A combination of fcmp with fabs is a win, unless the constant
+ // involved is 0 (which is handled by later passes).
+ Worthy = WhichConst != 0;
+ PossibleJunk.insert(CI);
+ }
+ }
+ converted(&I, Op0, Mask, Worthy);
+}
+
+void SystemZTDCPass::convertICmp(CmpInst &I) {
+ Value *Op0 = I.getOperand(0);
+ auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
+ auto Pred = I.getPredicate();
+ // All our icmp rules involve comparisons with consts.
+ if (!Const)
+ return;
+ if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
+ // Check for icmp+bitcast used for signbit.
+ if (!Cast->getSrcTy()->isFloatTy() &&
+ !Cast->getSrcTy()->isDoubleTy() &&
+ !Cast->getSrcTy()->isFP128Ty())
+ return;
+ Value *V = Cast->getOperand(0);
+ int Mask;
+ if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
+ // icmp slt (bitcast X), 0 - set if sign bit true
+ Mask = SystemZ::TDCMASK_MINUS;
+ } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
+ // icmp sgt (bitcast X), -1 - set if sign bit false
+ Mask = SystemZ::TDCMASK_PLUS;
+ } else {
+ // Not a sign bit check.
+ return;
+ }
+ PossibleJunk.insert(Cast);
+ converted(&I, V, Mask, true);
+ } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
+ // Check if this is a pre-existing call of our tdc intrinsic.
+ Function *F = CI->getCalledFunction();
+ if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
+ return;
+ if (!Const->isZero())
+ return;
+ Value *V = CI->getArgOperand(0);
+ auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ // Bail if the mask is not a constant.
+ if (!MaskC)
+ return;
+ int Mask = MaskC->getZExtValue();
+ Mask &= SystemZ::TDCMASK_ALL;
+ if (Pred == CmpInst::ICMP_NE) {
+ // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
+ } else if (Pred == CmpInst::ICMP_EQ) {
+ // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
+ Mask ^= SystemZ::TDCMASK_ALL;
+ } else {
+ // An unknown comparison - ignore.
+ return;
+ }
+ PossibleJunk.insert(CI);
+ converted(&I, V, Mask, false);
+ }
+}
+
+void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
+ Value *Op0, *Op1;
+ int Mask0, Mask1;
+ bool Worthy0, Worthy1;
+ std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
+ std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
+ if (Op0 != Op1)
+ return;
+ int Mask;
+ switch (I.getOpcode()) {
+ case Instruction::And:
+ Mask = Mask0 & Mask1;
+ break;
+ case Instruction::Or:
+ Mask = Mask0 | Mask1;
+ break;
+ case Instruction::Xor:
+ Mask = Mask0 ^ Mask1;
+ break;
+ default:
+ llvm_unreachable("Unknown op in convertLogicOp");
+ }
+ converted(&I, Op0, Mask, true);
+}
+
+bool SystemZTDCPass::runOnFunction(Function &F) {
+ ConvertedInsts.clear();
+ LogicOpsWorklist.clear();
+ PossibleJunk.clear();
+
+ // Look for icmp+fcmp instructions.
+ for (auto &I : instructions(F)) {
+ if (I.getOpcode() == Instruction::FCmp)
+ convertFCmp(cast<CmpInst>(I));
+ else if (I.getOpcode() == Instruction::ICmp)
+ convertICmp(cast<CmpInst>(I));
+ }
+
+ // If none found, bail already.
+ if (ConvertedInsts.empty())
+ return false;
+
+ // Process the queue of logic instructions.
+ while (!LogicOpsWorklist.empty()) {
+ BinaryOperator *Op = LogicOpsWorklist.back();
+ LogicOpsWorklist.pop_back();
+ // If both operands mapped, and the instruction itself not yet mapped,
+ // convert it.
+ if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
+ ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
+ !ConvertedInsts.count(Op))
+ convertLogicOp(*Op);
+ }
+
+ // Time to actually replace the instructions. Do it in the reverse order
+ // of finding them, since there's a good chance the earlier ones will be
+ // unused (due to being folded into later ones).
+ Module &M = *F.getParent();
+ auto &Ctx = M.getContext();
+ Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ bool MadeChange = false;
+ for (auto &It : reverse(ConvertedInsts)) {
+ Instruction *I = It.first;
+ Value *V;
+ int Mask;
+ bool Worthy;
+ std::tie(V, Mask, Worthy) = It.second;
+ if (!I->user_empty()) {
+ // If used and unworthy of conversion, skip it.
+ if (!Worthy)
+ continue;
+ // Call the intrinsic, compare result with 0.
+ Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
+ V->getType());
+ IRBuilder<> IRB(I);
+ Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
+ Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
+ Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
+ I->replaceAllUsesWith(ICmp);
+ }
+ // If unused, or used and converted, remove it.
+ I->eraseFromParent();
+ MadeChange = true;
+ }
+
+ if (!MadeChange)
+ return false;
+
+ // We've actually done something - now clear misc accumulated junk (fabs,
+ // bitcast).
+ for (auto *I : PossibleJunk)
+ if (I->user_empty())
+ I->eraseFromParent();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 000000000000..cb81c0e5276e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,222 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
+#include "SystemZMachineScheduler.h"
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar.h"
+#include <string>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+ // Register the target.
+ RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
+}
+
+// Determine whether we use the vector ABI.
+static bool UsesVectorABI(StringRef CPU, StringRef FS) {
+ // We use the vector ABI whenever the vector facility is avaiable.
+ // This is the case by default if CPU is z13 or later, and can be
+ // overridden via "[+-]vector" feature string elements.
+ bool VectorABI = true;
+ if (CPU.empty() || CPU == "generic" ||
+ CPU == "z10" || CPU == "z196" || CPU == "zEC12")
+ VectorABI = false;
+
+ SmallVector<StringRef, 3> Features;
+ FS.split(Features, ',', -1, false /* KeepEmpty */);
+ for (auto &Feature : Features) {
+ if (Feature == "vector" || Feature == "+vector")
+ VectorABI = true;
+ if (Feature == "-vector")
+ VectorABI = false;
+ }
+
+ return VectorABI;
+}
+
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
+ StringRef FS) {
+ bool VectorABI = UsesVectorABI(CPU, FS);
+ std::string Ret;
+
+ // Big endian.
+ Ret += "E";
+
+ // Data mangling.
+ Ret += DataLayout::getManglingComponent(TT);
+
+ // Make sure that global data has at least 16 bits of alignment by
+ // default, so that we can refer to it using LARL. We don't have any
+ // special requirements for stack variables though.
+ Ret += "-i1:8:16-i8:8:16";
+
+ // 64-bit integers are naturally aligned.
+ Ret += "-i64:64";
+
+ // 128-bit floats are aligned only to 64 bits.
+ Ret += "-f128:64";
+
+ // When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
+ if (VectorABI)
+ Ret += "-v128:64";
+
+ // We prefer 16 bits of aligned for all globals; see above.
+ Ret += "-a:8:16";
+
+ // Integer registers are 32 or 64 bits.
+ Ret += "-n32:64";
+
+ return Ret;
+}
+
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+ // Static code is suitable for use in a dynamic executable; there is no
+ // separate DynamicNoPIC model.
+ if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
+ return Reloc::Static;
+ return *RM;
+}
+
+SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+ getEffectiveRelocModel(RM), CM, OL),
+ TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
+ Subtarget(TT, CPU, FS, *this) {
+ initAsmInfo();
+}
+
+SystemZTargetMachine::~SystemZTargetMachine() = default;
+
+namespace {
+
+/// SystemZ Code Generator Pass Configuration Options.
+class SystemZPassConfig : public TargetPassConfig {
+public:
+ SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SystemZTargetMachine &getSystemZTargetMachine() const {
+ return getTM<SystemZTargetMachine>();
+ }
+
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ return new ScheduleDAGMI(C,
+ llvm::make_unique<SystemZPostRASchedStrategy>(C),
+ /*RemoveKillFlags=*/true);
+ }
+
+ void addIRPasses() override;
+ bool addInstSelector() override;
+ bool addILPOpts() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+
+} // end anonymous namespace
+
+void SystemZPassConfig::addIRPasses() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZTDCPass());
+
+ TargetPassConfig::addIRPasses();
+}
+
+bool SystemZPassConfig::addInstSelector() {
+ addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZLDCleanupPass(getSystemZTargetMachine()));
+
+ return false;
+}
+
+bool SystemZPassConfig::addILPOpts() {
+ addPass(&EarlyIfConverterID);
+ return true;
+}
+
+void SystemZPassConfig::addPreSched2() {
+ addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
+}
+
+void SystemZPassConfig::addPreEmitPass() {
+ // Do instruction shortening before compare elimination because some
+ // vector instructions will be shortened into opcodes that compare
+ // elimination recognizes.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
+ // We eliminate comparisons here rather than earlier because some
+ // transformations can change the set of available CC values and we
+ // generally want those transformations to have priority. This is
+ // especially true in the commonest case where the result of the comparison
+ // is used by a single in-range branch instruction, since we will then
+ // be able to fuse the compare and the branch instead.
+ //
+ // For example, two-address NILF can sometimes be converted into
+ // three-address RISBLG. NILF produces a CC value that indicates whether
+ // the low word is zero, but RISBLG does not modify CC at all. On the
+ // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
+ // The CC value produced by NILL isn't useful for our purposes, but the
+ // value produced by RISBG can be used for any comparison with zero
+ // (not just equality). So there are some transformations that lose
+ // CC values (while still being worthwhile) and others that happen to make
+ // the CC result more useful than it was originally.
+ //
+ // Another reason is that we only want to use BRANCH ON COUNT in cases
+ // where we know that the count register is not going to be spilled.
+ //
+ // Doing it so late makes it more likely that a register will be reused
+ // between the comparison and the branch, but it isn't clear whether
+ // preventing that would be a win or not.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
+ addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
+
+ // Do final scheduling after all other optimizations, to get an
+ // optimal input for the decoder (branch relaxation must happen
+ // after block placement).
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&PostMachineSchedulerID);
+}
+
+TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SystemZPassConfig(*this, PM);
+}
+
+TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(SystemZTTIImpl(this, F));
+ });
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 000000000000..a10ca64fa632
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,58 @@
+//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <memory>
+
+namespace llvm {
+
+class SystemZTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ SystemZSubtarget Subtarget;
+
+public:
+ SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~SystemZTargetMachine() override;
+
+ const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+
+ // Override LLVMTargetMachine
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ bool targetSchedulesPostRAScheduling() const override { return true; };
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
new file mode 100644
index 000000000000..ce5c57e0f519
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -0,0 +1,888 @@
+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// SystemZ target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemztti"
+
+//===----------------------------------------------------------------------===//
+//
+// SystemZ cost model.
+//
+//===----------------------------------------------------------------------===//
+
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ if (Imm == 0)
+ return TTI::TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ // Constants loaded via lgfi.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Basic;
+ // Constants loaded via llilf.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Basic;
+ // Constants loaded via llihf:
+ if ((Imm.getZExtValue() & 0xffffffff) == 0)
+ return TTI::TCC_Basic;
+
+ return 2 * TTI::TCC_Basic;
+ }
+
+ return 4 * TTI::TCC_Basic;
+}
+
+int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ switch (Opcode) {
+ default:
+ return TTI::TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
+ case Instruction::Store:
+ if (Idx == 0 && Imm.getBitWidth() <= 64) {
+ // Any 8-bit immediate store can by implemented via mvi.
+ if (BitSize == 8)
+ return TTI::TCC_Free;
+ // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
+ if (isInt<16>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::ICmp:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Comparisons against signed 32-bit immediates implemented via cgfi.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ // Comparisons against unsigned 32-bit immediates implemented via clgfi.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Or their negation, by swapping addition vs. subtraction.
+ if (isUInt<32>(-Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Mul:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // We use msgfi to multiply by 32-bit signed immediates.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Masks supported by oilf/xilf.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Masks supported by oihf/xihf.
+ if ((Imm.getZExtValue() & 0xffffffff) == 0)
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::And:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Any 32-bit AND operation can by implemented via nilf.
+ if (BitSize <= 32)
+ return TTI::TCC_Free;
+ // 64-bit masks supported by nilf.
+ if (isUInt<32>(~Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // 64-bit masks supported by nilh.
+ if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+ return TTI::TCC_Free;
+ // Some 64-bit AND operations can be implemented via risbg.
+ const SystemZInstrInfo *TII = ST->getInstrInfo();
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // Always return TCC_Free for the shift value of a shift instruction.
+ if (Idx == 1)
+ return TTI::TCC_Free;
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ break;
+ }
+
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ switch (IID) {
+ default:
+ return TTI::TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ // These get expanded to include a normal addition/subtraction.
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ if (isUInt<32>(-Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // These get expanded to include a normal multiplication.
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ }
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+ if (ST->hasPopulationCount() && TyWidth <= 64)
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
+}
+
+void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP) {
+ // Find out if L contains a call, what the machine instruction count
+ // estimate is, and how many stores there are.
+ bool HasCall = false;
+ unsigned NumStores = 0;
+ for (auto &BB : L->blocks())
+ for (auto &I : *BB) {
+ if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
+ ImmutableCallSite CS(&I);
+ if (const Function *F = CS.getCalledFunction()) {
+ if (isLoweredToCall(F))
+ HasCall = true;
+ if (F->getIntrinsicID() == Intrinsic::memcpy ||
+ F->getIntrinsicID() == Intrinsic::memset)
+ NumStores++;
+ } else { // indirect call.
+ HasCall = true;
+ }
+ }
+ if (isa<StoreInst>(&I)) {
+ Type *MemAccessTy = I.getOperand(0)->getType();
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
+ }
+ }
+
+ // The z13 processor will run out of store tags if too many stores
+ // are fed into it too quickly. Therefore make sure there are not
+ // too many stores in the resulting unrolled loop.
+ unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
+
+ if (HasCall) {
+ // Only allow full unrolling if loop has any calls.
+ UP.FullUnrollMaxCount = Max;
+ UP.MaxCount = 1;
+ return;
+ }
+
+ UP.MaxCount = Max;
+ if (UP.MaxCount <= 1)
+ return;
+
+ // Allow partial and runtime trip count unrolling.
+ UP.Partial = UP.Runtime = true;
+
+ UP.PartialThreshold = 75;
+ UP.DefaultUnrollRuntimeCount = 4;
+
+ // Allow expensive instructions in the pre-header of the loop.
+ UP.AllowExpensiveTripCount = true;
+
+ UP.Force = true;
+}
+
+unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
+ if (!Vector)
+ // Discount the stack pointer. Also leave out %r0, since it can't
+ // be used in an address.
+ return 14;
+ if (ST->hasVector())
+ return 32;
+ return 0;
+}
+
+unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
+ if (!Vector)
+ return 64;
+ if (ST->hasVector())
+ return 128;
+ return 0;
+}
+
+int SystemZTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
+
+ // TODO: return a good value for BB-VECTORIZER that includes the
+ // immediate loads, which we do not want to count for the loop
+ // vectorizer, since they are hopefully hoisted out of the loop. This
+ // would require a new parameter 'InLoop', but not sure if constant
+ // args are common enough to motivate this.
+
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+
+ // Div with a constant which is a power of 2 will be converted by
+ // DAGCombiner to use shifts. With vector shift-element instructions, a
+ // vector sdiv costs about as much as a scalar one.
+ const unsigned SDivCostEstimate = 4;
+ bool SDivPow2 = false;
+ bool UDivPow2 = false;
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
+ Args.size() == 2) {
+ const ConstantInt *CI = nullptr;
+ if (const Constant *C = dyn_cast<Constant>(Args[1])) {
+ if (C->getType()->isVectorTy())
+ CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ else
+ CI = dyn_cast<const ConstantInt>(C);
+ }
+ if (CI != nullptr &&
+ (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
+ if (Opcode == Instruction::SDiv)
+ SDivPow2 = true;
+ else
+ UDivPow2 = true;
+ }
+ }
+
+ if (Ty->isVectorTy()) {
+ assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+ unsigned VF = Ty->getVectorNumElements();
+ unsigned NumVectors = getNumberOfParts(Ty);
+
+ // These vector operations are custom handled, but are still supported
+ // with one instruction per vector, regardless of element size.
+ if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
+ Opcode == Instruction::AShr || UDivPow2) {
+ return NumVectors;
+ }
+
+ if (SDivPow2)
+ return (NumVectors * SDivCostEstimate);
+
+ // These FP operations are supported with a single vector instruction for
+ // double (base implementation assumes float generally costs 2). For
+ // FP128, the scalar cost is 1, and there is no overhead since the values
+ // are already in scalar registers.
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
+ switch (ScalarBits) {
+ case 32: {
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for these FP operations are currently just as
+ // expensive as for VF 4.
+ if (VF == 2)
+ Cost *= 2;
+ return Cost;
+ }
+ case 64:
+ case 128:
+ return NumVectors;
+ default:
+ break;
+ }
+ }
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem) {
+ unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for float is currently just as expensive as for VF 4.
+ if (VF == 2 && ScalarBits == 32)
+ Cost *= 2;
+ return Cost;
+ }
+ }
+ else { // Scalar:
+ // These FP operations are supported with a dedicated instruction for
+ // float, double and fp128 (base implementation assumes float generally
+ // costs 2).
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+ return 1;
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem)
+ return LIBCALL_COST;
+
+ if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
+ return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
+
+ // Or requires one instruction, although it has custom handling for i64.
+ if (Opcode == Instruction::Or)
+ return 1;
+
+ if (Opcode == Instruction::Xor && ScalarBits == 1)
+ // 2 * ipm sequences ; xor ; shift ; compare
+ return 7;
+
+ if (UDivPow2)
+ return 1;
+ if (SDivPow2)
+ return SDivCostEstimate;
+
+ // An extra extension for narrow types is needed.
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+ // sext of op(s) for narrow types
+ return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+
+ if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+ // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
+ return (ScalarBits < 32 ? 4 : 2);
+ }
+
+ // Fallback to the default implementation.
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
+}
+
+
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ assert (Tp->isVectorTy());
+ assert (ST->hasVector() && "getShuffleCost() called.");
+ unsigned NumVectors = getNumberOfParts(Tp);
+
+ // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+
+ // FP128 values are always in scalar registers, so there is no work
+ // involved with a shuffle, except for broadcast. In that case register
+ // moves are done with a single instruction per element.
+ if (Tp->getScalarType()->isFP128Ty())
+ return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+
+ switch (Kind) {
+ case TargetTransformInfo::SK_ExtractSubvector:
+ // ExtractSubvector Index indicates start offset.
+
+ // Extracting a subvector from first index is a noop.
+ return (Index == 0 ? 0 : NumVectors);
+
+ case TargetTransformInfo::SK_Broadcast:
+ // Loop vectorizer calls here to figure out the extra cost of
+ // broadcasting a loaded value to all elements of a vector. Since vlrep
+ // loads and replicates with a single instruction, adjust the returned
+ // value.
+ return NumVectors - 1;
+
+ default:
+
+ // SystemZ supports single instruction permutation / replication.
+ return NumVectors;
+ }
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+// Return the log2 difference of the element sizes of the two vector types.
+static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
+ unsigned Bits0 = Ty0->getScalarSizeInBits();
+ unsigned Bits1 = Ty1->getScalarSizeInBits();
+
+ if (Bits1 > Bits0)
+ return (Log2_32(Bits1) - Log2_32(Bits0));
+
+ return (Log2_32(Bits0) - Log2_32(Bits1));
+}
+
+// Return the number of instructions needed to truncate SrcTy to DstTy.
+unsigned SystemZTTIImpl::
+getVectorTruncCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
+ assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
+ "Packing must reduce size of vector type.");
+ assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
+ "Packing should not change number of elements.");
+
+ // TODO: Since fp32 is expanded, the extract cost should always be 0.
+
+ unsigned NumParts = getNumberOfParts(SrcTy);
+ if (NumParts <= 2)
+ // Up to 2 vector registers can be truncated efficiently with pack or
+ // permute. The latter requires an immediate mask to be loaded, which
+ // typically gets hoisted out of a loop. TODO: return a good value for
+ // BB-VECTORIZER that includes the immediate loads, which we do not want
+ // to count for the loop vectorizer.
+ return 1;
+
+ unsigned Cost = 0;
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ unsigned VF = SrcTy->getVectorNumElements();
+ for (unsigned P = 0; P < Log2Diff; ++P) {
+ if (NumParts > 1)
+ NumParts /= 2;
+ Cost += NumParts;
+ }
+
+ // Currently, a general mix of permutes and pack instructions is output by
+ // isel, which follow the cost computation above except for this case which
+ // is one instruction less:
+ if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 &&
+ DstTy->getScalarSizeInBits() == 8)
+ Cost--;
+
+ return Cost;
+}
+
+// Return the cost of converting a vector bitmask produced by a compare
+// (SrcTy), to the type of the select or extend instruction (DstTy).
+unsigned SystemZTTIImpl::
+getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy() &&
+ "Should only be called with vector types.");
+
+ unsigned PackCost = 0;
+ unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
+ unsigned DstScalarBits = DstTy->getScalarSizeInBits();
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ if (SrcScalarBits > DstScalarBits)
+ // The bitmask will be truncated.
+ PackCost = getVectorTruncCost(SrcTy, DstTy);
+ else if (SrcScalarBits < DstScalarBits) {
+ unsigned DstNumParts = getNumberOfParts(DstTy);
+ // Each vector select needs its part of the bitmask unpacked.
+ PackCost = Log2Diff * DstNumParts;
+ // Extra cost for moving part of mask before unpacking.
+ PackCost += DstNumParts - 1;
+ }
+
+ return PackCost;
+}
+
+// Return the type of the compared operands. This is needed to compute the
+// cost for a Select / ZExt or SExt instruction.
+static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
+ Type *OpTy = nullptr;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
+ OpTy = CI->getOperand(0)->getType();
+ else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
+ if (LogicI->getNumOperands() == 2)
+ if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
+ if (isa<CmpInst>(LogicI->getOperand(1)))
+ OpTy = CI0->getOperand(0)->getType();
+
+ if (OpTy != nullptr) {
+ if (VF == 1) {
+ assert (!OpTy->isVectorTy() && "Expected scalar type");
+ return OpTy;
+ }
+ // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may
+ // be either scalar or already vectorized with a same or lesser VF.
+ Type *ElTy = OpTy->getScalarType();
+ return VectorType::get(ElTy, VF);
+ }
+
+ return nullptr;
+}
+
+int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
+ unsigned DstScalarBits = Dst->getScalarSizeInBits();
+ unsigned SrcScalarBits = Src->getScalarSizeInBits();
+
+ if (Src->isVectorTy()) {
+ assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
+ assert (Dst->isVectorTy());
+ unsigned VF = Src->getVectorNumElements();
+ unsigned NumDstVectors = getNumberOfParts(Dst);
+ unsigned NumSrcVectors = getNumberOfParts(Src);
+
+ if (Opcode == Instruction::Trunc) {
+ if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
+ return 0; // Check for NOOP conversions.
+ return getVectorTruncCost(Src, Dst);
+ }
+
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (SrcScalarBits >= 8) {
+ // ZExt/SExt will be handled with one unpack per doubling of width.
+ unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst);
+
+ // For types that spans multiple vector registers, some additional
+ // instructions are used to setup the unpacking.
+ unsigned NumSrcVectorOps =
+ (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
+ : (NumDstVectors / 2));
+
+ return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
+ }
+ else if (SrcScalarBits == 1) {
+ // This should be extension of a compare i1 result.
+ // If we know what the widths of the compared operands, get the
+ // cost of converting it to Dst. Otherwise assume same widths.
+ unsigned Cost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+ if (Opcode == Instruction::ZExt)
+ // One 'vn' per dst vector with an immediate mask.
+ Cost += NumDstVectors;
+ return Cost;
+ }
+ }
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
+ Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
+ // TODO: Fix base implementation which could simplify things a bit here
+ // (seems to miss on differentiating on scalar/vector types).
+
+ // Only 64 bit vector conversions are natively supported.
+ if (SrcScalarBits == 64 && DstScalarBits == 64)
+ return NumDstVectors;
+
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values. Base implementation does not
+ // realize float->int gets scalarized.
+ unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
+ Src->getScalarType());
+ unsigned TotCost = VF * ScalarCost;
+ bool NeedsInserts = true, NeedsExtracts = true;
+ // FP128 registers do not get inserted or extracted.
+ if (DstScalarBits == 128 &&
+ (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
+ NeedsInserts = false;
+ if (SrcScalarBits == 128 &&
+ (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
+ NeedsExtracts = false;
+
+ TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+
+ // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
+ if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
+ TotCost *= 2;
+
+ return TotCost;
+ }
+
+ if (Opcode == Instruction::FPTrunc) {
+ if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
+ return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+ else // double -> float
+ return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
+ }
+
+ if (Opcode == Instruction::FPExt) {
+ if (SrcScalarBits == 32 && DstScalarBits == 64) {
+ // float -> double is very rare and currently unoptimized. Instead of
+ // using vldeb, which can do two at a time, all conversions are
+ // scalarized.
+ return VF * 2;
+ }
+ // -> fp128. VF * lxdb/lxeb + extraction of elements.
+ return VF + getScalarizationOverhead(Src, false, true);
+ }
+ }
+ else { // Scalar
+ assert (!Dst->isVectorTy());
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
+ return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ Src->isIntegerTy(1)) {
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+
+ return Cost;
+ }
+ }
+
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ if (ValTy->isVectorTy()) {
+ assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
+ unsigned VF = ValTy->getVectorNumElements();
+
+ // Called with a compare instruction.
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+ unsigned PredicateExtraCost = 0;
+ if (I != nullptr) {
+ // Some predicates cost one or two extra instructions.
+ switch (dyn_cast<CmpInst>(I)->getPredicate()) {
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_ULE:
+ case CmpInst::Predicate::ICMP_SGE:
+ case CmpInst::Predicate::ICMP_SLE:
+ PredicateExtraCost = 1;
+ break;
+ case CmpInst::Predicate::FCMP_ONE:
+ case CmpInst::Predicate::FCMP_ORD:
+ case CmpInst::Predicate::FCMP_UEQ:
+ case CmpInst::Predicate::FCMP_UNO:
+ PredicateExtraCost = 2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
+ // floats. FIXME: <2 x float> generates same code as <4 x float>.
+ unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
+ unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+
+ unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
+ return Cost;
+ }
+ else { // Called with a select instruction.
+ assert (Opcode == Instruction::Select);
+
+ // We can figure out the extra cost of packing / unpacking if the
+ // instruction was passed and the compare instruction is found.
+ unsigned PackCost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ PackCost =
+ getVectorBitmaskConversionCost(CmpOpTy, ValTy);
+
+ return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+ }
+ }
+ else { // Scalar
+ switch (Opcode) {
+ case Instruction::ICmp: {
+ unsigned Cost = 1;
+ if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+ Cost += 2; // extend both operands
+ return Cost;
+ }
+ case Instruction::Select:
+ if (ValTy->isFloatingPointTy())
+ return 4; // No load on condition for FP, so this costs a conditional jump.
+ return 1; // Load On Condition.
+ }
+ }
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+}
+
+int SystemZTTIImpl::
+getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ // vlvgp will insert two grs into a vector register, so only count half the
+ // number of instructions.
+ if (Opcode == Instruction::InsertElement &&
+ Val->getScalarType()->isIntegerTy(64))
+ return ((Index % 2 == 0) ? 1 : 0);
+
+ if (Opcode == Instruction::ExtractElement) {
+ int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+
+ // Give a slight penalty for moving out of vector pipeline to FXU unit.
+ if (Index == 0 && Val->getScalarType()->isIntegerTy())
+ Cost += 1;
+
+ return Cost;
+ }
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+}
+
+int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
+ assert(!Src->isVoidTy() && "Invalid type");
+
+ if (!Src->isVectorTy() && Opcode == Instruction::Load &&
+ I != nullptr && I->hasOneUse()) {
+ const Instruction *UserI = cast<Instruction>(*I->user_begin());
+ unsigned Bits = Src->getScalarSizeInBits();
+ bool FoldsLoad = false;
+ switch (UserI->getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+ FoldsLoad = (Bits == 32 || Bits == 64);
+ break;
+ }
+
+ if (FoldsLoad) {
+ assert (UserI->getNumOperands() == 2 &&
+ "Expected to only handle binops.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == I)
+ continue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
+ if (LI->hasOneUse())
+ return i == 0;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ unsigned NumOps = getNumberOfParts(Src);
+
+ if (Src->getScalarSizeInBits() == 128)
+ // 128 bit scalars are held in a pair of two 64 bit registers.
+ NumOps *= 2;
+
+ return NumOps;
+}
+
+int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
+ (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
+ assert (WideBits > 0 && "Could not compute size of vector");
+ int NumWideParts =
+ ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+
+ // How many source vectors are handled to produce a vectorized operand?
+ int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
+ int NumSrcParts =
+ ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+
+ // A Load group may have gaps.
+ unsigned NumOperands =
+ ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+
+ // Each needed permute takes two vectors as input.
+ if (NumSrcParts > 1)
+ NumSrcParts--;
+ int NumPermutes = NumSrcParts * NumOperands;
+
+ // Cost of load/store operations and the permutations needed.
+ return NumWideParts + NumPermutes;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
new file mode 100644
index 000000000000..6923fc6fc910
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -0,0 +1,91 @@
+//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
+ typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const SystemZSubtarget *ST;
+ const SystemZTargetLowering *TLI;
+
+ const SystemZSubtarget *getST() const { return ST; }
+ const SystemZTargetLowering *getTLI() const { return TLI; }
+
+ unsigned const LIBCALL_COST = 30;
+
+public:
+ explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
+
+ bool prefersVectorizedAddressing() { return false; }
+ bool supportsEfficientVectorElementLoadStore() { return true; }
+ bool enableInterleavedAccessVectorization() { return true; }
+
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
+ unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace, const Instruction *I = nullptr);
+
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 000000000000..d3c53a43b391
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target &llvm::getTheSystemZTarget() {
+ static Target TheSystemZTarget;
+ return TheSystemZTarget;
+}
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+ RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(getTheSystemZTarget(),
+ "systemz", "SystemZ");
+}