diff options
Diffstat (limited to 'contrib/llvm/lib/Target/SystemZ')
39 files changed, 2990 insertions, 1068 deletions
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 9c995bf42b0b..3923614c89d3 100644 --- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -13,9 +13,9 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -391,6 +391,9 @@ public: : MCTargetAsmParser(Options, sti), Parser(parser) { MCAsmParserExtension::Initialize(Parser); + // Alias the .word directive to .short. + parser.addAliasForDirective(".word", ".short"); + // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index bf67b75d5337..20e015b42d21 100644 --- a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "SystemZ.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -46,6 +46,34 @@ extern "C" void LLVMInitializeSystemZDisassembler() { createSystemZDisassembler); } +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. +/// +/// @param Value - The immediate Value, has had any PC adjustment made by +/// the caller. +/// @param isBranch - If the instruction is a branch instruction +/// @param Address - The starting address of the instruction +/// @param Offset - The byte offset to this immediate in the instruction +/// @param Width - The byte width of this immediate in the instruction +/// +/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was +/// called then that function is called to get any symbolic information for the +/// immediate in the instruction using the Address, Offset and Width. If that +/// returns non-zero then the symbolic information it returns is used to create +/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() +/// returns zero and isBranch is true then a symbol look up for immediate Value +/// is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with the immediate Value is created. This function returns true +/// if it adds an operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, + uint64_t Address, uint64_t Offset, + uint64_t Width, MCInst &MI, + const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, + Offset, Width); +} + static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, const unsigned *Regs, unsigned Size) { assert(RegNo < Size && "Invalid register"); @@ -206,22 +234,35 @@ static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm, template<unsigned N> static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address) { + uint64_t Address, + bool isBranch, + const void *Decoder) { assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); - Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address)); + uint64_t Value = SignExtend64<N>(Imm) * 2 + Address; + + if (!tryAddingSymbolicOperand(Value, isBranch, Address, 2, N / 8, + Inst, Decoder)) + Inst.addOperand(MCOperand::createImm(Value)); + return MCDisassembler::Success; } -static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, - const void *Decoder) { - return decodePCDBLOperand<16>(Inst, Imm, Address); +static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder); +} + +static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, + const void *Decoder) { + return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder); } static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, const void *Decoder) { - return decodePCDBLOperand<32>(Inst, Imm, Address); + return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder); } static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field, diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 57eebe19c044..c4d546cb7dff 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -58,7 +58,8 @@ public: const MCAsmLayout &Layout) const override { return false; } - void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override { llvm_unreachable("SystemZ does do not have assembler relaxation"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index ee1af023769e..368c95f7bac2 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -24,8 +24,8 @@ public: protected: // Override MCELFObjectTargetWriter. - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; }; } // end anonymous namespace @@ -106,7 +106,8 @@ static unsigned getPLTReloc(unsigned Kind) { llvm_unreachable("Unsupported absolute address"); } -unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, +unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 2115d4480eef..e16ba9e15317 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -10,7 +10,6 @@ #include "SystemZMCTargetDesc.h" #include "InstPrinter/SystemZInstPrinter.h" #include "SystemZMCAsmInfo.h" -#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" @@ -159,17 +158,8 @@ createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { return createSystemZMCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - - // Static code is suitable for use in a dynamic executable; there is no - // separate DynamicNoPIC model. - if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) - RM = Reloc::Static; - +static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, + CodeModel::Model &CM) { // For SystemZ we define the models as follows: // // Small: BRASL can call any function and will use a stub if necessary. @@ -203,8 +193,6 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT, CM = CodeModel::Small; else if (CM == CodeModel::JITDefault) CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; - X->initMCCodeGenInfo(RM, CM, OL); - return X; } static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, @@ -220,9 +208,9 @@ extern "C" void LLVMInitializeSystemZTargetMC() { TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget, createSystemZMCAsmInfo); - // Register the MCCodeGenInfo. - TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget, - createSystemZMCCodeGenInfo); + // Register the adjustCodeGenOpts. + TargetRegistry::registerMCAdjustCodeGenOpts(TheSystemZTarget, + adjustCodeGenOpts); // Register the MCCodeEmitter. TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget, diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt index cd367d60bab7..86a1322c9e23 100644 --- a/contrib/llvm/lib/Target/SystemZ/README.txt +++ b/contrib/llvm/lib/Target/SystemZ/README.txt @@ -7,13 +7,6 @@ for later architectures at some point. -- -SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all -inline asm memory constraints; it doesn't get to see the original constraint. -This means that it must conservatively treat all inline asm constraints -as the most restricted type, "R". - --- - If an inline asm ties an i32 "r" result to an i64 input, the input will be treated as an i32, leaving the upper bits uninitialised. For example: @@ -43,15 +36,6 @@ We don't use the BRANCH ON INDEX instructions. -- -We might want to use BRANCH ON CONDITION for conditional indirect calls -and conditional returns. - --- - -We don't use the TEST DATA CLASS instructions. - --- - We only use MVC, XC and CLC for constant-length block operations. We could extend them to variable-length operations too, using EXECUTE RELATIVE LONG. @@ -79,11 +63,6 @@ via a register.) -- -We don't use the halfword forms of LOAD REVERSED and STORE REVERSED -(LRVH and STRVH). - --- - We don't use ICM or STCM. -- @@ -123,7 +102,7 @@ ought to be implemented as: ngr %r2, %r0 br %r14 -but two-address optimisations reverse the order of the AND and force: +but two-address optimizations reverse the order of the AND and force: lhi %r0, 1 ngr %r0, %r2 @@ -166,3 +145,10 @@ If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. We might want to model all access registers and use them to spill 32-bit values. + +-- + +We might want to use the 'overflow' condition of eg. AR to support +llvm.sadd.with.overflow.i32 and related instructions - the generated code +for signed overflow check is currently quite bad. This would improve +the results of using -ftrapv. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h index cafe2c5948c4..c8ea9641fb62 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZ.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h @@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED = CCMASK_1; const unsigned CCMASK_VCMP_NONE = CCMASK_3; const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; +// Condition-code mask assignments for Test Data Class. +const unsigned CCMASK_TDC_NOMATCH = CCMASK_0; +const unsigned CCMASK_TDC_MATCH = CCMASK_1; +const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH; + // The position of the low CC bit in an IPM result. const unsigned IPM_CC = 28; @@ -94,6 +99,41 @@ const unsigned IPM_CC = 28; const unsigned PFD_READ = 1; const unsigned PFD_WRITE = 2; +// Mask assignments for TDC +const unsigned TDCMASK_ZERO_PLUS = 0x800; +const unsigned TDCMASK_ZERO_MINUS = 0x400; +const unsigned TDCMASK_NORMAL_PLUS = 0x200; +const unsigned TDCMASK_NORMAL_MINUS = 0x100; +const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080; +const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040; +const unsigned TDCMASK_INFINITY_PLUS = 0x020; +const unsigned TDCMASK_INFINITY_MINUS = 0x010; +const unsigned TDCMASK_QNAN_PLUS = 0x008; +const unsigned TDCMASK_QNAN_MINUS = 0x004; +const unsigned TDCMASK_SNAN_PLUS = 0x002; +const unsigned TDCMASK_SNAN_MINUS = 0x001; + +const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS; +const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS | + TDCMASK_SUBNORMAL_PLUS | + TDCMASK_INFINITY_PLUS; +const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS | + TDCMASK_SUBNORMAL_MINUS | + TDCMASK_INFINITY_MINUS; +const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS | + TDCMASK_QNAN_MINUS | + TDCMASK_SNAN_PLUS | + TDCMASK_SNAN_MINUS; +const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE | + TDCMASK_ZERO_PLUS | + TDCMASK_QNAN_PLUS | + TDCMASK_SNAN_PLUS; +const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE | + TDCMASK_ZERO_MINUS | + TDCMASK_QNAN_MINUS | + TDCMASK_SNAN_MINUS; +const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS; + // Number of bits in a vector register. const unsigned VectorBits = 128; @@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZTDCPass(); } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 75273114d62f..9c0f327ff744 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -109,6 +109,85 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D); break; + case SystemZ::CondReturn: + LoweredMI = MCInstBuilder(SystemZ::BCR) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addReg(SystemZ::R14D); + break; + + case SystemZ::CRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CGRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CGIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLGRBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + + case SystemZ::CLGIBReturn: + LoweredMI = MCInstBuilder(SystemZ::CLGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R14D) + .addImm(0); + break; + case SystemZ::CallBRASL: LoweredMI = MCInstBuilder(SystemZ::BRASL) .addReg(SystemZ::R14D) @@ -126,10 +205,96 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); break; + case SystemZ::CallBRCL: + LoweredMI = MCInstBuilder(SystemZ::BRCL) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addExpr(Lower.getExpr(MI->getOperand(2), MCSymbolRefExpr::VK_PLT)); + break; + case SystemZ::CallBR: LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); break; + case SystemZ::CallBCR: + LoweredMI = MCInstBuilder(SystemZ::BCR) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addReg(SystemZ::R1D); + break; + + case SystemZ::CRBCall: + LoweredMI = MCInstBuilder(SystemZ::CRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CGRBCall: + LoweredMI = MCInstBuilder(SystemZ::CGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CIBCall: + LoweredMI = MCInstBuilder(SystemZ::CIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CGIBCall: + LoweredMI = MCInstBuilder(SystemZ::CGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CLRBCall: + LoweredMI = MCInstBuilder(SystemZ::CLRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CLGRBCall: + LoweredMI = MCInstBuilder(SystemZ::CLGRB) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CLIBCall: + LoweredMI = MCInstBuilder(SystemZ::CLIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + + case SystemZ::CLGIBCall: + LoweredMI = MCInstBuilder(SystemZ::CLGIB) + .addReg(MI->getOperand(0).getReg()) + .addImm(MI->getOperand(1).getImm()) + .addImm(MI->getOperand(2).getImm()) + .addReg(SystemZ::R1D) + .addImm(0); + break; + case SystemZ::TLS_GDCALL: LoweredMI = MCInstBuilder(SystemZ::BRASL) .addReg(SystemZ::R14D) @@ -260,6 +425,41 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(15).addReg(SystemZ::R0D); break; + // Emit nothing here but a comment if we can. + case SystemZ::MemBarrier: + OutStreamer->emitRawComment("MEMBARRIER"); + return; + + // We want to emit "j .+2" for traps, jumping to the relative immediate field + // of the jump instruction, which is an illegal instruction. We cannot emit a + // "." symbol, so create and emit a temp label before the instruction and use + // that instead. + case SystemZ::Trap: { + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(DotSym); + + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext); + const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); + LoweredMI = MCInstBuilder(SystemZ::J) + .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext)); + } + break; + + // Conditional traps will create a branch on condition instruction that jumps + // to the relative immediate field of the jump instruction. (eg. "jo .+2") + case SystemZ::CondTrap: { + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(DotSym); + + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext); + const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); + LoweredMI = MCInstBuilder(SystemZ::BRC) + .addImm(MI->getOperand(0).getImm()) + .addImm(MI->getOperand(1).getImm()) + .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext)); + } + break; + default: Lower.lower(MI, LoweredMI); break; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp index cc9c84b6a058..72da51f74b10 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -12,10 +12,10 @@ using namespace llvm; -const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = { +const MCPhysReg SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = { SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D }; -const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = { +const MCPhysReg SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = { SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D }; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h index bff0706618aa..b5523e586f4c 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -12,14 +12,15 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/MC/MCRegisterInfo.h" namespace llvm { namespace SystemZ { const unsigned NumArgGPRs = 5; - extern const unsigned ArgGPRs[NumArgGPRs]; + extern const MCPhysReg ArgGPRs[NumArgGPRs]; const unsigned NumArgFPRs = 4; - extern const unsigned ArgFPRs[NumArgFPRs]; + extern const MCPhysReg ArgFPRs[NumArgFPRs]; } // end namespace SystemZ class SystemZCCState : public CCState { @@ -79,6 +80,51 @@ public: bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; } }; +// Handle i128 argument types. These need to be passed by implicit +// reference. This could be as simple as the following .td line: +// CCIfType<[i128], CCPassIndirect<i64>>, +// except that i128 is not a legal type, and therefore gets split by +// common code into a pair of i64 arguments. +inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + + // ArgFlags.isSplit() is true on the first part of a i128 argument; + // PendingMembers.empty() is false on all subsequent parts. + if (!ArgFlags.isSplit() && PendingMembers.empty()) + return false; + + // Push a pending Indirect value location for each part. + LocVT = MVT::i64; + LocInfo = CCValAssign::Indirect; + PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, + LocVT, LocInfo)); + if (!ArgFlags.isSplitEnd()) + return true; + + // OK, we've collected all parts in the pending list. Allocate + // the location (register or stack slot) for the indirect pointer. + // (This duplicates the usual i64 calling convention rules.) + unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs); + unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8); + + // Use that same location for all the pending parts. + for (auto &It : PendingMembers) { + if (Reg) + It.convertToReg(Reg); + else + It.convertToMem(Offset); + State.addLoc(It); + } + + PendingMembers.clear(); + + return true; +} + } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td index bdd1b1598adb..2bf5ac29865f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -33,6 +33,9 @@ def RetCC_SystemZ : CallingConv<[ // Promote i32 to i64 if it has an explicit extension type. CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + // A SwiftError is returned in R9. + CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>, + // ABI-compliant code returns 64-bit integers in R2. Make the other // call-clobbered argument registers available for code that doesn't // care about the ABI. (R6 is an argument register too, but is @@ -65,8 +68,17 @@ def CC_SystemZ : CallingConv<[ // are smaller than 64 bits shouldn't. CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + // A SwiftSelf is passed in callee-saved R10. + CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>, + + // A SwiftError is passed in callee-saved R9. + CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>, + // Force long double values to the stack and pass i64 pointers to them. CCIfType<[f128], CCPassIndirect<i64>>, + // Same for i128 values. These are already split into two i64 here, + // so we have to use a custom handler. + CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>, // The first 5 integer arguments are passed in R2-R6. Note that R6 // is call-saved. @@ -105,3 +117,6 @@ def CC_SystemZ : CallingConv<[ //===----------------------------------------------------------------------===// def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15), (sequence "F%dD", 8, 15))>; + +// R9 is used to return SwiftError; remove it from CSR. +def CSR_SystemZ_SwiftError : CalleeSavedRegs<(sub CSR_SystemZ, R9D)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index 4818ed015522..27350b88554d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -65,18 +64,22 @@ public: bool processBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } private: - Reference getRegReferences(MachineInstr *MI, unsigned Reg); - bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + Reference getRegReferences(MachineInstr &MI, unsigned Reg); + bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers); - bool convertToLoadAndTest(MachineInstr *MI); - bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + bool convertToLoadAndTest(MachineInstr &MI); + bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers); - bool optimizeCompareZero(MachineInstr *Compare, + bool optimizeCompareZero(MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers); - bool fuseCompareAndBranch(MachineInstr *Compare, - SmallVectorImpl<MachineInstr *> &CCUsers); + bool fuseCompareOperations(MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -98,14 +101,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) { } // Return true if any CC result of MI would reflect the value of Reg. -static bool resultTests(MachineInstr *MI, unsigned Reg) { - if (MI->getNumOperands() > 0 && - MI->getOperand(0).isReg() && - MI->getOperand(0).isDef() && - MI->getOperand(0).getReg() == Reg) +static bool resultTests(MachineInstr &MI, unsigned Reg) { + if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() && + MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg) return true; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case SystemZ::LR: case SystemZ::LGR: case SystemZ::LGFR: @@ -118,7 +119,7 @@ static bool resultTests(MachineInstr *MI, unsigned Reg) { case SystemZ::LTEBR: case SystemZ::LTDBR: case SystemZ::LTXBR: - if (MI->getOperand(1).getReg() == Reg) + if (MI.getOperand(1).getReg() == Reg) return true; } @@ -126,10 +127,10 @@ static bool resultTests(MachineInstr *MI, unsigned Reg) { } // Describe the references to Reg or any of its aliases in MI. -Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { +Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { Reference Ref; - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { if (unsigned MOReg = MO.getReg()) { if (TRI->regsOverlap(MOReg, Reg)) { @@ -146,23 +147,23 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { // Return true if this is a load and test which can be optimized the // same way as compare instruction. -static bool isLoadAndTestAsCmp(MachineInstr *MI) { +static bool isLoadAndTestAsCmp(MachineInstr &MI) { // If we during isel used a load-and-test as a compare with 0, the // def operand is dead. - return ((MI->getOpcode() == SystemZ::LTEBR || - MI->getOpcode() == SystemZ::LTDBR || - MI->getOpcode() == SystemZ::LTXBR) && - MI->getOperand(0).isDead()); + return (MI.getOpcode() == SystemZ::LTEBR || + MI.getOpcode() == SystemZ::LTDBR || + MI.getOpcode() == SystemZ::LTXBR) && + MI.getOperand(0).isDead(); } // Return the source register of Compare, which is the unknown value // being tested. -static unsigned getCompareSourceReg(MachineInstr *Compare) { +static unsigned getCompareSourceReg(MachineInstr &Compare) { unsigned reg = 0; - if (Compare->isCompare()) - reg = Compare->getOperand(0).getReg(); + if (Compare.isCompare()) + reg = Compare.getOperand(0).getReg(); else if (isLoadAndTestAsCmp(Compare)) - reg = Compare->getOperand(1).getReg(); + reg = Compare.getOperand(1).getReg(); assert (reg); return reg; @@ -171,11 +172,11 @@ static unsigned getCompareSourceReg(MachineInstr *Compare) { // Compare compares the result of MI against zero. If MI is an addition // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition // and convert the branch to a BRCT(G). Return true on success. -bool -SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, - SmallVectorImpl<MachineInstr *> &CCUsers) { +bool SystemZElimCompare::convertToBRCT( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { // Check whether we have an addition of -1. - unsigned Opcode = MI->getOpcode(); + unsigned Opcode = MI.getOpcode(); unsigned BRCT; if (Opcode == SystemZ::AHI) BRCT = SystemZ::BRCT; @@ -183,7 +184,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, BRCT = SystemZ::BRCTG; else return false; - if (MI->getOperand(2).getImm() != -1) + if (MI.getOperand(2).getImm() != -1) return false; // Check whether we have a single JLH. @@ -201,7 +202,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, unsigned SrcReg = getCompareSourceReg(Compare); MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; for (++MBBI; MBBI != MBBE; ++MBBI) - if (getRegReferences(MBBI, SrcReg)) + if (getRegReferences(*MBBI, SrcReg)) return false; // The transformation is OK. Rebuild Branch as a BRCT(G). @@ -210,24 +211,24 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, Branch->RemoveOperand(0); Branch->setDesc(TII->get(BRCT)); MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addOperand(Target) - .addReg(SystemZ::CC, RegState::ImplicitDefine); - MI->eraseFromParent(); + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); + MI.eraseFromParent(); return true; } // If MI is a load instruction, try to convert it into a LOAD AND TEST. // Return true on success. -bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) { - unsigned Opcode = TII->getLoadAndTest(MI->getOpcode()); +bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) { + unsigned Opcode = TII->getLoadAndTest(MI.getOpcode()); if (!Opcode) return false; - MI->setDesc(TII->get(Opcode)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(SystemZ::CC, RegState::ImplicitDefine); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); return true; } @@ -236,10 +237,10 @@ bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) { // would also reflect the value of X. Try to adjust CCUsers so that // they test the result of MI directly, returning true on success. // Leave everything unchanged on failure. -bool SystemZElimCompare:: -adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, - SmallVectorImpl<MachineInstr *> &CCUsers) { - int Opcode = MI->getOpcode(); +bool SystemZElimCompare::adjustCCMasksForInstr( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + int Opcode = MI.getOpcode(); const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; @@ -247,7 +248,7 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); // For unsigned comparisons with zero, only equality makes sense. - unsigned CompareFlags = Compare->getDesc().TSFlags; + unsigned CompareFlags = Compare.getDesc().TSFlags; if (CompareFlags & SystemZII::IsLogical) ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; @@ -296,9 +297,9 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, } // CC is now live after MI. - int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); + int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); assert(CCDef >= 0 && "Couldn't find CC set"); - MI->getOperand(CCDef).setIsDead(false); + MI.getOperand(CCDef).setIsDead(false); // Clear any intervening kills of CC. MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; @@ -309,8 +310,8 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, } // Return true if Compare is a comparison against zero. -static bool isCompareZero(MachineInstr *Compare) { - switch (Compare->getOpcode()) { +static bool isCompareZero(MachineInstr &Compare) { + switch (Compare.getOpcode()) { case SystemZ::LTEBRCompare: case SystemZ::LTDBRCompare: case SystemZ::LTXBRCompare: @@ -321,9 +322,8 @@ static bool isCompareZero(MachineInstr *Compare) { if (isLoadAndTestAsCmp(Compare)) return true; - return (Compare->getNumExplicitOperands() == 2 && - Compare->getOperand(1).isImm() && - Compare->getOperand(1).getImm() == 0); + return Compare.getNumExplicitOperands() == 2 && + Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0; } } @@ -331,21 +331,20 @@ static bool isCompareZero(MachineInstr *Compare) { // a value against zero. Return true on success and if Compare should be // deleted as dead. CCUsers is the list of instructions that use the CC // value produced by Compare. -bool SystemZElimCompare:: -optimizeCompareZero(MachineInstr *Compare, - SmallVectorImpl<MachineInstr *> &CCUsers) { +bool SystemZElimCompare::optimizeCompareZero( + MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { if (!isCompareZero(Compare)) return false; // Search back for CC results that are based on the first operand. unsigned SrcReg = getCompareSourceReg(Compare); - MachineBasicBlock &MBB = *Compare->getParent(); + MachineBasicBlock &MBB = *Compare.getParent(); MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin(); Reference CCRefs; Reference SrcRefs; while (MBBI != MBBE) { --MBBI; - MachineInstr *MI = MBBI; + MachineInstr &MI = *MBBI; if (resultTests(MI, SrcReg)) { // Try to remove both MI and Compare by converting a branch to BRCT(G). // We don't care in this case whether CC is modified between MI and @@ -373,54 +372,85 @@ optimizeCompareZero(MachineInstr *Compare, // Try to fuse comparison instruction Compare into a later branch. // Return true on success and if Compare is therefore redundant. -bool SystemZElimCompare:: -fuseCompareAndBranch(MachineInstr *Compare, - SmallVectorImpl<MachineInstr *> &CCUsers) { - // See whether we have a comparison that can be fused. - unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), - Compare); - if (!FusedOpcode) - return false; - +bool SystemZElimCompare::fuseCompareOperations( + MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { // See whether we have a single branch with which to fuse. if (CCUsers.size() != 1) return false; MachineInstr *Branch = CCUsers[0]; - if (Branch->getOpcode() != SystemZ::BRC) + SystemZII::FusedCompareType Type; + switch (Branch->getOpcode()) { + case SystemZ::BRC: + Type = SystemZII::CompareAndBranch; + break; + case SystemZ::CondReturn: + Type = SystemZII::CompareAndReturn; + break; + case SystemZ::CallBCR: + Type = SystemZII::CompareAndSibcall; + break; + case SystemZ::CondTrap: + Type = SystemZII::CompareAndTrap; + break; + default: + return false; + } + + // See whether we have a comparison that can be fused. + unsigned FusedOpcode = + TII->getFusedCompare(Compare.getOpcode(), Type, &Compare); + if (!FusedOpcode) return false; // Make sure that the operands are available at the branch. - unsigned SrcReg = Compare->getOperand(0).getReg(); - unsigned SrcReg2 = (Compare->getOperand(1).isReg() ? - Compare->getOperand(1).getReg() : 0); + unsigned SrcReg = Compare.getOperand(0).getReg(); + unsigned SrcReg2 = + Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0; MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; for (++MBBI; MBBI != MBBE; ++MBBI) if (MBBI->modifiesRegister(SrcReg, TRI) || (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) return false; - // Read the branch mask and target. + // Read the branch mask, target (if applicable), regmask (if applicable). MachineOperand CCMask(MBBI->getOperand(1)); - MachineOperand Target(MBBI->getOperand(2)); assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && "Invalid condition-code mask for integer comparison"); + // This is only valid for CompareAndBranch. + MachineOperand Target(MBBI->getOperand( + Type == SystemZII::CompareAndBranch ? 2 : 0)); + const uint32_t *RegMask; + if (Type == SystemZII::CompareAndSibcall) + RegMask = MBBI->getOperand(2).getRegMask(); // Clear out all current operands. int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); - assert(CCUse >= 0 && "BRC must use CC"); + assert(CCUse >= 0 && "BRC/BCR must use CC"); Branch->RemoveOperand(CCUse); - Branch->RemoveOperand(2); + // Remove target (branch) or regmask (sibcall). + if (Type == SystemZII::CompareAndBranch || + Type == SystemZII::CompareAndSibcall) + Branch->RemoveOperand(2); Branch->RemoveOperand(1); Branch->RemoveOperand(0); // Rebuild Branch as a fused compare and branch. Branch->setDesc(TII->get(FusedOpcode)); - MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) - .addOperand(Compare->getOperand(0)) - .addOperand(Compare->getOperand(1)) - .addOperand(CCMask) - .addOperand(Target) - .addReg(SystemZ::CC, RegState::ImplicitDefine); + MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); + MIB.addOperand(Compare.getOperand(0)) + .addOperand(Compare.getOperand(1)) + .addOperand(CCMask); + + if (Type == SystemZII::CompareAndBranch) { + // Only conditional branches define CC, as they may be converted back + // to a non-fused branch because of a long displacement. Conditional + // returns don't have that problem. + MIB.addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); + } + + if (Type == SystemZII::CompareAndSibcall) + MIB.addRegMask(RegMask); // Clear any intervening kills of SrcReg and SrcReg2. MBBI = Compare; @@ -445,29 +475,31 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { SmallVector<MachineInstr *, 4> CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { - MachineInstr *MI = --MBBI; - if (CompleteCCUsers && - (MI->isCompare() || isLoadAndTestAsCmp(MI)) && + MachineInstr &MI = *--MBBI; + if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && (optimizeCompareZero(MI, CCUsers) || - fuseCompareAndBranch(MI, CCUsers))) { + fuseCompareOperations(MI, CCUsers))) { ++MBBI; - MI->eraseFromParent(); + MI.eraseFromParent(); Changed = true; CCUsers.clear(); continue; } - if (MI->definesRegister(SystemZ::CC)) { + if (MI.definesRegister(SystemZ::CC)) { CCUsers.clear(); CompleteCCUsers = true; } - if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers) - CCUsers.push_back(MI); + if (MI.readsRegister(SystemZ::CC) && CompleteCCUsers) + CCUsers.push_back(&MI); } return Changed; } bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { + if (skipFunction(*F.getFunction())) + return false; + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); TRI = &TII->getRegisterInfo(); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index e1b20d0536d1..ccaed49475ca 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -81,6 +81,12 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) SavedRegs.set(SystemZ::ArgGPRs[I]); + // If there are any landing pads, entering them will modify r6/r7. + if (!MF.getMMI().getLandingPads().empty()) { + SavedRegs.set(SystemZ::R6D); + SavedRegs.set(SystemZ::R7D); + } + // If the function requires a frame pointer, record that the hard // frame pointer will be clobbered. if (HasFP) @@ -258,7 +264,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Do a second scan adding regs as being defined by instruction for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); - if (Reg != LowGPR && Reg != HighGPR) + if (Reg != LowGPR && Reg != HighGPR && + SystemZ::GR64BitRegClass.contains(Reg)) MIB.addReg(Reg, RegState::ImplicitDefine); } } @@ -353,6 +360,15 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, uint64_t StackSize = getAllocatedStackSize(MF); if (StackSize) { + // Determine if we want to store a backchain. + bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + + // If we need backchain, save current stack pointer. R1 is free at this + // point. + if (StoreBackchain) + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); + // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); @@ -363,6 +379,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); SPOffsetFromCFA += Delta; + + if (StoreBackchain) + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0); } if (HasFP) { @@ -511,7 +531,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return true; } -void SystemZFrameLowering:: +MachineBasicBlock::iterator SystemZFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { @@ -520,7 +540,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, case SystemZ::ADJCALLSTACKUP: assert(hasReservedCallFrame(MF) && "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); - MBB.erase(MI); + return MBB.erase(MI); break; default: diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 46bb6b7a7573..d43a176ad874 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -46,10 +46,9 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const - override; + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; // Return the number of bytes in the callee-allocated part of the frame. uint64_t getAllocatedStackSize(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index a9093094d884..cd7fcc3070a4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -113,7 +113,8 @@ static uint64_t allOnes(unsigned int Count) { // (and (rotl Input, Rotate), Mask) // // otherwise. The output value has BitSize bits, although Input may be -// narrower (in which case the upper bits are don't care). +// narrower (in which case the upper bits are don't care), or wider (in which +// case the result will be truncated as part of the operation). struct RxSBGOperands { RxSBGOperands(unsigned Op, SDValue N) : Opcode(Op), BitSize(N.getValueType().getSizeInBits()), @@ -279,18 +280,18 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { bool expandRxSBG(RxSBGOperands &RxSBG) const; // Return an undefined value of type VT. - SDValue getUNDEF(SDLoc DL, EVT VT) const; + SDValue getUNDEF(const SDLoc &DL, EVT VT) const; // Convert N to VT, if it isn't already. - SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const; + SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const; // Try to implement AND or shift node N using RISBG with the zero flag set. // Return the selected node on success, otherwise return null. - SDNode *tryRISBGZero(SDNode *N); + bool tryRISBGZero(SDNode *N); // Try to use RISBG or Opcode to implement OR or XOR node N. // Return the selected node on success, otherwise return null. - SDNode *tryRxSBG(SDNode *N, unsigned Opcode); + bool tryRxSBG(SDNode *N, unsigned Opcode); // If Op0 is null, then Node is a constant that can be loaded using: // @@ -299,14 +300,14 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // If Op0 is nonnull, then Node can be implemented using: // // (Opcode (Opcode Op0 UpperVal) LowerVal) - SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, - uint64_t UpperVal, uint64_t LowerVal); + void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, + uint64_t UpperVal, uint64_t LowerVal); // Try to use gather instruction Opcode to implement vector insertion N. - SDNode *tryGather(SDNode *N, unsigned Opcode); + bool tryGather(SDNode *N, unsigned Opcode); // Try to use scatter instruction Opcode to implement store Store. - SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode); + bool tryScatter(StoreSDNode *Store, unsigned Opcode); // Return true if Load and Store are loads and stores of the same size // and are guaranteed not to overlap. Such operations can be implemented @@ -343,7 +344,7 @@ public: } // Override SelectionDAGISel. - SDNode *Select(SDNode *Node) override; + void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; @@ -554,6 +555,10 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)->getSExtValue())) ; + // Also see if it's a bare ADJDYNALLOC. + else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC && + expandAdjDynAlloc(AM, true, SDValue())) + ; else // Otherwise try expanding each component. while (expandAddress(AM, true) || @@ -741,6 +746,16 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { SDValue N = RxSBG.Input; unsigned Opcode = N.getOpcode(); switch (Opcode) { + case ISD::TRUNCATE: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + uint64_t BitSize = N.getValueType().getSizeInBits(); + uint64_t Mask = allOnes(BitSize); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + RxSBG.Input = N.getOperand(0); + return true; + } case ISD::AND: { if (RxSBG.Opcode == SystemZ::RNSBG) return false; @@ -888,12 +903,13 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { } } -SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const { +SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const { SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); return SDValue(N, 0); } -SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const { +SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT, + SDValue N) const { if (N.getValueType() == MVT::i32 && VT == MVT::i64) return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32, DL, VT, getUNDEF(DL, MVT::i64), N); @@ -903,23 +919,27 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const { return N; } -SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { +bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); if (!VT.isInteger() || VT.getSizeInBits() > 64) - return nullptr; + return false; RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); unsigned Count = 0; while (expandRxSBG(RISBG)) - if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND) + // The widening or narrowing is expected to be free. + // Counting widening or narrowing as a saved operation will result in + // preferring an R*SBG over a simple shift/logical instruction. + if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND && + RISBG.Input.getOpcode() != ISD::TRUNCATE) Count += 1; if (Count == 0) - return nullptr; + return false; if (Count == 1) { // Prefer to use normal shift instructions over RISBG, since they can handle // all cases and are sometimes shorter. if (N->getOpcode() != ISD::AND) - return nullptr; + return false; // Prefer register extensions like LLC over RISBG. Also prefer to start // out with normal ANDs if one instruction would be enough. We can convert @@ -934,9 +954,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { if (MaskN->getZExtValue() != RISBG.Mask) { SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT); N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); - return SelectCode(N); + SelectCode(N); + return true; } - return nullptr; + return false; } } @@ -952,8 +973,11 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { } SDValue In = convertTo(DL, VT, RISBG.Input); - N = CurDAG->getMachineNode(OpCode, DL, VT, In); - return convertTo(DL, VT, SDValue(N, 0)).getNode(); + SDValue New = convertTo( + DL, VT, SDValue(CurDAG->getMachineNode(OpCode, DL, VT, In), 0)); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); + return true; } unsigned Opcode = SystemZ::RISBG; @@ -974,15 +998,18 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32), CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32) }; - N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops); - return convertTo(DL, VT, SDValue(N, 0)).getNode(); + SDValue New = convertTo( + DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0)); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); + return true; } -SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { +bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { SDLoc DL(N); EVT VT = N->getValueType(0); if (!VT.isInteger() || VT.getSizeInBits() > 64) - return nullptr; + return false; // Try treating each operand of N as the second operand of the RxSBG // and see which goes deepest. RxSBGOperands RxSBG[] = { @@ -992,12 +1019,16 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { unsigned Count[] = { 0, 0 }; for (unsigned I = 0; I < 2; ++I) while (expandRxSBG(RxSBG[I])) - if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND) + // The widening or narrowing is expected to be free. + // Counting widening or narrowing as a saved operation will result in + // preferring an R*SBG over a simple shift/logical instruction. + if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND && + RxSBG[I].Input.getOpcode() != ISD::TRUNCATE) Count[I] += 1; // Do nothing if neither operand is suitable. if (Count[0] == 0 && Count[1] == 0) - return nullptr; + return false; // Pick the deepest second operand. unsigned I = Count[0] > Count[1] ? 0 : 1; @@ -1007,7 +1038,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode())) if (Load->getMemoryVT() == MVT::i8) - return nullptr; + return false; // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. @@ -1025,47 +1056,70 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32), CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32) }; - N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops); - return convertTo(DL, VT, SDValue(N, 0)).getNode(); + SDValue New = convertTo( + DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0)); + ReplaceNode(N, New.getNode()); + return true; } -SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, - SDValue Op0, uint64_t UpperVal, - uint64_t LowerVal) { +void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, + SDValue Op0, uint64_t UpperVal, + uint64_t LowerVal) { EVT VT = Node->getValueType(0); SDLoc DL(Node); SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT); if (Op0.getNode()) Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper); - Upper = SDValue(Select(Upper.getNode()), 0); + + { + // When we haven't passed in Op0, Upper will be a constant. In order to + // prevent folding back to the large immediate in `Or = getNode(...)` we run + // SelectCode first and end up with an opaque machine node. This means that + // we need to use a handle to keep track of Upper in case it gets CSE'd by + // SelectCode. + // + // Note that in the case where Op0 is passed in we could just call + // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing + // the handle at all, but it's fine to do it here. + // + // TODO: This is a pretty hacky way to do this. Can we do something that + // doesn't require a two paragraph explanation? + HandleSDNode Handle(Upper); + SelectCode(Upper.getNode()); + Upper = Handle.getValue(); + } SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT); SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); - return Or.getNode(); + + ReplaceUses(Node, Or.getNode()); + CurDAG->RemoveDeadNode(Node); + + SelectCode(Or.getNode()); } -SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { +bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { SDValue ElemV = N->getOperand(2); auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); if (!ElemN) - return 0; + return false; unsigned Elem = ElemN->getZExtValue(); EVT VT = N->getValueType(0); if (Elem >= VT.getVectorNumElements()) - return 0; + return false; auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1)); if (!Load || !Load->hasOneUse()) - return 0; + return false; if (Load->getMemoryVT().getSizeInBits() != Load->getValueType(0).getSizeInBits()) - return 0; + return false; SDValue Base, Disp, Index; if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) || Index.getValueType() != VT.changeVectorElementTypeToInteger()) - return 0; + return false; SDLoc DL(Load); SDValue Ops[] = { @@ -1074,39 +1128,41 @@ SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { }; SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); ReplaceUses(SDValue(Load, 1), SDValue(Res, 1)); - return Res; + ReplaceNode(N, Res); + return true; } -SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { +bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { SDValue Value = Store->getValue(); if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return 0; + return false; if (Store->getMemoryVT().getSizeInBits() != Value.getValueType().getSizeInBits()) - return 0; + return false; SDValue ElemV = Value.getOperand(1); auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); if (!ElemN) - return 0; + return false; SDValue Vec = Value.getOperand(0); EVT VT = Vec.getValueType(); unsigned Elem = ElemN->getZExtValue(); if (Elem >= VT.getVectorNumElements()) - return 0; + return false; SDValue Base, Disp, Index; if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) || Index.getValueType() != VT.changeVectorElementTypeToInteger()) - return 0; + return false; SDLoc DL(Store); SDValue Ops[] = { Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32), Store->getChain() }; - return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); + return true; } bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, @@ -1167,7 +1223,7 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB); } -SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { +void SystemZDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -1175,43 +1231,47 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); - return nullptr; + return; } unsigned Opcode = Node->getOpcode(); - SDNode *ResNode = nullptr; switch (Opcode) { case ISD::OR: if (Node->getOperand(1).getOpcode() != ISD::Constant) - ResNode = tryRxSBG(Node, SystemZ::ROSBG); + if (tryRxSBG(Node, SystemZ::ROSBG)) + return; goto or_xor; case ISD::XOR: if (Node->getOperand(1).getOpcode() != ISD::Constant) - ResNode = tryRxSBG(Node, SystemZ::RXSBG); + if (tryRxSBG(Node, SystemZ::RXSBG)) + return; // Fall through. or_xor: // If this is a 64-bit operation in which both 32-bit halves are nonzero, // split the operation into two. - if (!ResNode && Node->getValueType(0) == MVT::i64) + if (Node->getValueType(0) == MVT::i64) if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); - if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) - Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0), - Val - uint32_t(Val), uint32_t(Val)); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { + splitLargeImmediate(Opcode, Node, Node->getOperand(0), + Val - uint32_t(Val), uint32_t(Val)); + return; + } } break; case ISD::AND: if (Node->getOperand(1).getOpcode() != ISD::Constant) - ResNode = tryRxSBG(Node, SystemZ::RNSBG); + if (tryRxSBG(Node, SystemZ::RNSBG)) + return; // Fall through. case ISD::ROTL: case ISD::SHL: case ISD::SRL: case ISD::ZERO_EXTEND: - if (!ResNode) - ResNode = tryRISBGZero(Node); + if (tryRISBGZero(Node)) + return; break; case ISD::Constant: @@ -1219,9 +1279,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // LLIHF and LGFI, split it into two 32-bit pieces. if (Node->getValueType(0) == MVT::i64) { uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue(); - if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) - Node = splitLargeImmediate(ISD::OR, Node, SDValue(), - Val - uint32_t(Val), uint32_t(Val)); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) { + splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val), + uint32_t(Val)); + return; + } } break; @@ -1249,63 +1311,75 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { case ISD::INSERT_VECTOR_ELT: { EVT VT = Node->getValueType(0); unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); - if (ElemBitSize == 32) - ResNode = tryGather(Node, SystemZ::VGEF); - else if (ElemBitSize == 64) - ResNode = tryGather(Node, SystemZ::VGEG); + if (ElemBitSize == 32) { + if (tryGather(Node, SystemZ::VGEF)) + return; + } else if (ElemBitSize == 64) { + if (tryGather(Node, SystemZ::VGEG)) + return; + } break; } case ISD::STORE: { auto *Store = cast<StoreSDNode>(Node); unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits(); - if (ElemBitSize == 32) - ResNode = tryScatter(Store, SystemZ::VSCEF); - else if (ElemBitSize == 64) - ResNode = tryScatter(Store, SystemZ::VSCEG); + if (ElemBitSize == 32) { + if (tryScatter(Store, SystemZ::VSCEF)) + return; + } else if (ElemBitSize == 64) { + if (tryScatter(Store, SystemZ::VSCEG)) + return; + } break; } } - // Select the default instruction - if (!ResNode) - ResNode = SelectCode(Node); - - DEBUG(errs() << "=> "; - if (ResNode == nullptr || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - errs() << "\n"; - ); - return ResNode; + SelectCode(Node); } bool SystemZDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + SystemZAddressingMode::AddrForm Form; + SystemZAddressingMode::DispRange DispRange; + SDValue Base, Disp, Index; + switch(ConstraintID) { default: llvm_unreachable("Unexpected asm memory constraint"); case InlineAsm::Constraint_i: - case InlineAsm::Constraint_m: case InlineAsm::Constraint_Q: + // Accept an address with a short displacement, but no index. + Form = SystemZAddressingMode::FormBD; + DispRange = SystemZAddressingMode::Disp12Only; + break; case InlineAsm::Constraint_R: + // Accept an address with a short displacement and an index. + Form = SystemZAddressingMode::FormBDXNormal; + DispRange = SystemZAddressingMode::Disp12Only; + break; case InlineAsm::Constraint_S: + // Accept an address with a long displacement, but no index. + Form = SystemZAddressingMode::FormBD; + DispRange = SystemZAddressingMode::Disp20Only; + break; case InlineAsm::Constraint_T: - // Accept addresses with short displacements, which are compatible - // with Q, R, S and T. But keep the index operand for future expansion. - SDValue Base, Disp, Index; - if (selectBDXAddr(SystemZAddressingMode::FormBD, - SystemZAddressingMode::Disp12Only, - Op, Base, Disp, Index)) { - OutOps.push_back(Base); - OutOps.push_back(Disp); - OutOps.push_back(Index); - return false; - } + case InlineAsm::Constraint_m: + // Accept an address with a long displacement and an index. + // m works the same as T, as this is the most general case. + Form = SystemZAddressingMode::FormBDXNormal; + DispRange = SystemZAddressingMode::Disp20Only; break; } + + if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) { + OutOps.push_back(Base); + OutOps.push_back(Disp); + OutOps.push_back(Index); + return false; + } + return true; } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index b0a612764636..14991bbbd365 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -184,8 +184,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // No special instructions for these. setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); // Use *MUL_LOHI where possible instead of MULH*. @@ -216,6 +214,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + // Traps are legal, as we will convert them to "j .+2". + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // z10 has instructions for signed but not unsigned FP conversion. // Handle unsigned 32-bit types as signed 64-bit types. if (!Subtarget.hasFPExtension()) { @@ -253,6 +256,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // We need to handle dynamic allocations specially because of the // 160-byte area at the bottom of the stack. setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); // Use custom expanders so that we can force the function to use // a frame pointer. @@ -310,8 +314,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); // Convert a GPR scalar to a vector by inserting it into element 0. setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); @@ -437,6 +439,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::ROTL); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -799,7 +806,7 @@ static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) { // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. -static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, +static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Chain, SDValue Value) { // If the argument has been promoted from a smaller type, insert an @@ -813,16 +820,12 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, if (VA.isExtInLoc()) Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); - else if (VA.getLocInfo() == CCValAssign::Indirect) - Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, - MachinePointerInfo(), false, false, false, 0); else if (VA.getLocInfo() == CCValAssign::BCvt) { // If this is a short vector argument loaded from the stack, // extend from i64 to full vector size and then bitcast. assert(VA.getLocVT() == MVT::i64); assert(VA.getValVT().isVector()); - Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, - Value, DAG.getUNDEF(MVT::i64)); + Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); } else assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); @@ -832,7 +835,7 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, // Value is a value of type VA.getValVT() that we need to copy into // the location described by VA. Return a copy of Value converted to // VA.getValVT(). The caller is responsible for handling indirect values. -static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, +static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Value) { switch (VA.getLocInfo()) { case CCValAssign::SExt: @@ -856,11 +859,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, } } -SDValue SystemZTargetLowering:: -LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue SystemZTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -868,6 +870,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, MF.getInfo<SystemZMachineFunctionInfo>(); auto *TFL = static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Detect unsupported vector argument types. if (Subtarget.hasVector()) @@ -930,19 +933,34 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, // Create the SelectionDAG nodes corresponding to a load // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. - EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, - MachinePointerInfo::getFixedStack(MF, FI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI)); } // Convert the value of the argument register into the value that's // being passed. - InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); + if (VA.getLocInfo() == CCValAssign::Indirect) { + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, + MachinePointerInfo())); + // If the original argument was split (e.g. i128), we need + // to load all parts of it here (using the same address). + unsigned ArgIndex = Ins[I].OrigArgIndex; + assert (Ins[I].PartOffset == 0); + while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { + CCValAssign &PartVA = ArgLocs[I + 1]; + unsigned PartOffset = Ins[I + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, + DAG.getIntPtrConstant(PartOffset, DL)); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + ++I; + } + } else + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); } if (IsVarArg) { @@ -973,8 +991,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, - MachinePointerInfo::getFixedStack(MF, FI), - false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI)); } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, @@ -987,9 +1004,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, } static bool canUseSiblingCall(const CCState &ArgCCInfo, - SmallVectorImpl<CCValAssign> &ArgLocs) { + SmallVectorImpl<CCValAssign> &ArgLocs, + SmallVectorImpl<ISD::OutputArg> &Outs) { // Punt if there are any indirect or stack arguments, or if the call - // needs the call-saved argument register R6. + // needs the callee-saved argument register R6, or if the call uses + // the callee-saved register arguments SwiftSelf and SwiftError. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; if (VA.getLocInfo() == CCValAssign::Indirect) @@ -999,6 +1018,8 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo, unsigned Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; + if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) + return false; } return true; } @@ -1032,7 +1053,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // We don't support GuaranteedTailCallOpt, only automatically-detected // sibling calls. - if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) IsTailCall = false; // Get a count of how many bytes are to be pushed on the stack. @@ -1054,11 +1075,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); - MemOpChains.push_back(DAG.getStore( - Chain, DL, ArgValue, SpillSlot, - MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + // If the original argument was split (e.g. i128), we need + // to store all parts of it here (and pass just one address). + unsigned ArgIndex = Outs[I].OrigArgIndex; + assert (Outs[I].PartOffset == 0); + while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[I + 1]; + unsigned PartOffset = Outs[I + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, + DAG.getIntPtrConstant(PartOffset, DL)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, PartValue, Address, + MachinePointerInfo::getFixedStack(MF, FI))); + ++I; + } ArgValue = SpillSlot; } else ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); @@ -1080,9 +1115,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getIntPtrConstant(Offset, DL)); // Emit the store. - MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, - MachinePointerInfo(), - false, false, 0)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } @@ -1180,17 +1214,23 @@ CanLowerReturn(CallingConv::ID CallConv, if (Subtarget.hasVector()) VerifyVectorTypes(Outs); + // Special case that we cannot easily detect in RetCC_SystemZ since + // i128 is not a legal type. + for (auto &Out : Outs) + if (Out.ArgVT == MVT::i128) + return false; + SmallVector<CCValAssign, 16> RetLocs; CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); } SDValue -SystemZTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool IsVarArg, +SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc DL, SelectionDAG &DAG) const { + const SDLoc &DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // Detect unsupported vector return types. @@ -1235,8 +1275,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } -SDValue SystemZTargetLowering:: -prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { +SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad( + SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } @@ -1399,6 +1439,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { CCValid = SystemZ::CCMASK_VCMP; return true; + case Intrinsic::s390_tdc: + Opcode = SystemZISD::TDC; + CCValid = SystemZ::CCMASK_TDC; + return true; + default: return false; } @@ -1538,7 +1583,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { // If C can be converted to a comparison against zero, adjust the operands // as necessary. -static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; @@ -1558,7 +1603,8 @@ static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. -static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || @@ -1614,11 +1660,10 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { ISD::ZEXTLOAD); if (C.Op0.getValueType() != MVT::i32 || Load->getExtensionType() != ExtType) - C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, - Load->getChain(), Load->getBasePtr(), - Load->getPointerInfo(), Load->getMemoryVT(), - Load->isVolatile(), Load->isNonTemporal(), - Load->isInvariant(), Load->getAlignment()); + C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), + Load->getBasePtr(), Load->getPointerInfo(), + Load->getMemoryVT(), Load->getAlignment(), + Load->getMemOperand()->getFlags()); // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || @@ -1719,7 +1764,8 @@ static unsigned reverseCCMask(unsigned CCMask) { // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. -static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -1784,7 +1830,8 @@ static void adjustForLTGFR(Comparison &C) { // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. -static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && @@ -1915,7 +1962,8 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. -static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { // Check that we have a comparison with a constant. auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); if (!ConstOp1) @@ -2036,7 +2084,7 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, SDLoc DL) { + ISD::CondCode Cond, const SDLoc &DL) { if (CmpOp1.getOpcode() == ISD::Constant) { uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; @@ -2089,7 +2137,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, } // Emit the comparison instruction described by C. -static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (!C.Op1.getNode()) { SDValue Op; switch (C.Op0.getOpcode()) { @@ -2119,9 +2167,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // Implement a 32-bit *MUL_LOHI operation by extending both operands to // 64 bits. Extend is the extension type to use. Store the high part // in Hi and the low part in Lo. -static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, - unsigned Extend, SDValue Op0, SDValue Op1, - SDValue &Hi, SDValue &Lo) { +static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, + SDValue Op0, SDValue Op1, SDValue &Hi, + SDValue &Lo) { Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); @@ -2136,10 +2184,9 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation // on the extended Op0 and (unextended) Op1. Store the even register result // in Even and the odd register result in Odd. -static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, - unsigned Extend, unsigned Opcode, - SDValue Op0, SDValue Op1, - SDValue &Even, SDValue &Odd) { +static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + unsigned Extend, unsigned Opcode, SDValue Op0, + SDValue Op1, SDValue &Even, SDValue &Odd) { SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, SDValue(In128, 0), Op1); @@ -2151,7 +2198,7 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, // Return an i32 value that is 1 if the CC value produced by Glue is // in the mask CCMask and 0 otherwise. CC is known to have a value // in CCValid, so other values can be ignored. -static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, +static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, unsigned CCValid, unsigned CCMask) { IPMConversion Conversion = getIPMConversion(CCValid, CCMask); SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); @@ -2220,7 +2267,7 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, // Return a v2f64 that contains the extended form of elements Start and Start+1 // of v4f32 value Op. -static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, +static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); @@ -2229,7 +2276,7 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. -static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, +static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, SDValue CmpOp1) { // There is no hardware support for v4f32, so extend the vector into // two v2f64s and compare those. @@ -2247,7 +2294,7 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. -static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1) { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); @@ -2342,7 +2389,7 @@ static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { } // Return the absolute or negative absolute of Op; IsNegative decides which. -static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, +static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative) { Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); if (IsNegative) @@ -2414,11 +2461,10 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - Reloc::Model RM = DAG.getTarget().getRelocationModel(); CodeModel::Model CM = DAG.getTarget().getCodeModel(); SDValue Result; - if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { + if (Subtarget.isPC32DBLSymbol(GV, CM)) { // Assign anchors at 1<<12 byte boundaries. uint64_t Anchor = Offset & ~uint64_t(0xfff); Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); @@ -2435,8 +2481,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction())); } // If there was a non-zero offset that we didn't fold, create an explicit @@ -2495,14 +2540,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); } -SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, - SelectionDAG &DAG) const { - if (DAG.getTarget().Options.EmulatedTLS) - return LowerToTLSEmulatedModel(Node, DAG); - SDLoc DL(Node); - const GlobalValue *GV = Node->getGlobal(); +SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); - TLSModel::Model model = DAG.getTarget().getTLSModel(GV); // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, @@ -2517,7 +2557,19 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, DAG.getConstant(32, DL, PtrVT)); - SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); + return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); +} + +SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(Node, DAG); + SDLoc DL(Node); + const GlobalValue *GV = Node->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + + SDValue TP = lowerThreadPointer(DL, DAG); // Get the offset of GA from the thread pointer, based on the TLS model. SDValue Offset; @@ -2530,8 +2582,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getConstantPool(CPV, PtrVT, 8); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); @@ -2546,8 +2597,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getConstantPool(CPV, PtrVT, 8); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the module base offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); @@ -2565,8 +2615,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); DTPOffset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), DTPOffset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); break; @@ -2577,9 +2626,9 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_INDNTPOFF); Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); - Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + Offset = + DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); break; } @@ -2591,8 +2640,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getConstantPool(CPV, PtrVT, 8); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); break; } } @@ -2640,6 +2688,57 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } +SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + SDLoc DL(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // If the back chain frame index has not been allocated yet, do so. + SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>(); + int BackChainIdx = FI->getFramePointerSaveIndex(); + if (!BackChainIdx) { + // By definition, the frame address is the address of the back chain. + BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + FI->setFramePointerSaveIndex(BackChainIdx); + } + SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); + + // FIXME The frontend should detect this case. + if (Depth > 0) { + report_fatal_error("Unsupported stack frame traversal count"); + } + + return BackChain; +} + +SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc DL(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // FIXME The frontend should detect this case. + if (Depth > 0) { + report_fatal_error("Unsupported stack frame traversal count"); + } + + // Return R14D, which has the return address. Mark it an implicit live-in. + unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); +} + SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -2715,8 +2814,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, - MachinePointerInfo(SV, Offset), - false, false, 0); + MachinePointerInfo(SV, Offset)); Offset += 8; } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); @@ -2740,8 +2838,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); - bool RealignOpt = !DAG.getMachineFunction().getFunction()-> - hasFnAttribute("no-realign-stack"); + MachineFunction &MF = DAG.getMachineFunction(); + bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack"); + bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -2763,10 +2862,15 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { // Get a reference to the stack pointer. SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + // If we need a backchain, save it now. + SDValue Backchain; + if (StoreBackchain) + Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo()); + // Add extra space for alignment if needed. if (ExtraAlignSpace) NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, - DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); @@ -2790,10 +2894,20 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); } + if (StoreBackchain) + Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo()); + SDValue Ops[2] = { Result, Chain }; return DAG.getMergeValues(Ops, DL); } +SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( + SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); +} + SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -3031,6 +3145,27 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, return Op; } +SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + SynchronizationScope FenceScope = static_cast<SynchronizationScope>( + cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && + FenceScope == CrossThread) { + return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, + Op.getOperand(0)), + 0); + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -3220,8 +3355,24 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); - return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), - SystemZ::R15D, Op.getOperand(1)); + bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + + SDValue Chain = Op.getOperand(0); + SDValue NewSP = Op.getOperand(1); + SDValue Backchain; + SDLoc DL(Op); + + if (StoreBackchain) { + SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64); + Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo()); + } + + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP); + + if (StoreBackchain) + Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo()); + + return Chain; } SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, @@ -3286,6 +3437,9 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (Id) { + case Intrinsic::thread_pointer: + return lowerThreadPointer(SDLoc(Op), DAG); + case Intrinsic::s390_vpdi: return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); @@ -3553,7 +3707,7 @@ static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, // Create a node that performs P on operands Op0 and Op1, casting the // operands to the appropriate type. The type of the result is determined by P. -static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, +static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1) { // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input // elements of a PACK are twice as wide as the outputs. @@ -3582,7 +3736,8 @@ static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Implement it on operands Ops[0] and Ops[1] using // VSLDI or VPERM. -static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, +static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, + SDValue *Ops, const SmallVectorImpl<int> &Bytes) { for (unsigned I = 0; I < 2; ++I) Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); @@ -3600,7 +3755,7 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); else IndexNodes[I] = DAG.getUNDEF(MVT::i32); - SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes); + SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); } @@ -3610,7 +3765,7 @@ struct GeneralShuffle { GeneralShuffle(EVT vt) : VT(vt) {} void addUndef(); void add(SDValue, unsigned); - SDValue getNode(SelectionDAG &, SDLoc); + SDValue getNode(SelectionDAG &, const SDLoc &); // The operands of the shuffle. SmallVector<SDValue, SystemZ::VectorBytes> Ops; @@ -3667,7 +3822,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { } Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); Byte = unsigned(NewByte) % SystemZ::VectorBytes; - } else if (Op.getOpcode() == ISD::UNDEF) { + } else if (Op.isUndef()) { addUndef(); return; } else @@ -3689,7 +3844,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { } // Return SDNodes for the completed shuffle. -SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { +SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); if (Ops.size() == 0) @@ -3770,37 +3925,37 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. static bool isScalarToVector(SDValue Op) { for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) - if (Op.getOperand(I).getOpcode() != ISD::UNDEF) + if (!Op.getOperand(I).isUndef()) return false; return true; } // Return a vector of type VT that contains Value in the first element. // The other elements don't matter. -static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value) { // If we have a constant, replicate it to all elements and let the // BUILD_VECTOR lowering take care of it. if (Value.getOpcode() == ISD::Constant || Value.getOpcode() == ISD::ConstantFP) { SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } - if (Value.getOpcode() == ISD::UNDEF) + if (Value.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); } // Return a vector of type VT in which Op0 is in element 0 and Op1 is in // element 1. Used for cases in which replication is cheap. -static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1) { - if (Op0.getOpcode() == ISD::UNDEF) { - if (Op1.getOpcode() == ISD::UNDEF) + if (Op0.isUndef()) { + if (Op1.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); } - if (Op1.getOpcode() == ISD::UNDEF) + if (Op1.isUndef()) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, buildScalarToVector(DAG, DL, VT, Op0), @@ -3809,15 +3964,15 @@ static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 // vector for them. -static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, +static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1) { - if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF) + if (Op0.isUndef() && Op1.isUndef()) return DAG.getUNDEF(MVT::v2i64); // If one of the two inputs is undefined then replicate the other one, // in order to avoid using another register unnecessarily. - if (Op0.getOpcode() == ISD::UNDEF) + if (Op0.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); - else if (Op1.getOpcode() == ISD::UNDEF) + else if (Op1.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); else { Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); @@ -3834,7 +3989,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { unsigned BytesPerElement = ElemVT.getStoreSize(); for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { SDValue Op = BVN->getOperand(I); - if (Op.getOpcode() != ISD::UNDEF) { + if (!Op.isUndef()) { uint64_t Value; if (Op.getOpcode() == ISD::Constant) Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue(); @@ -3862,7 +4017,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { // an empty value. static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SystemZInstrInfo *TII, - SDLoc DL, EVT VT, uint64_t Value, + const SDLoc &DL, EVT VT, uint64_t Value, unsigned BitsPerElement) { // Signed 16-bit values can be replicated using VREPI. int64_t SignedValue = SignExtend64(Value, BitsPerElement); @@ -3919,7 +4074,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); GS.add(Op.getOperand(0), Elem); FoundOne = true; - } else if (Op.getOpcode() == ISD::UNDEF) { + } else if (Op.isUndef()) { GS.addUndef(); } else { GS.add(SDValue(), ResidueOps.size()); @@ -3937,7 +4092,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); for (auto &Op : GS.Ops) { if (!Op.getNode()) { - Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); + Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); break; } } @@ -3946,14 +4101,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, } // Combine GPR scalar values Elems into a vector of type VT. -static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl<SDValue> &Elems) { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); unsigned int Count = 0; for (auto Elem : Elems) { - if (Elem.getOpcode() != ISD::UNDEF) { + if (!Elem.isUndef()) { if (!Single.getNode()) Single = Elem; else if (Elem != Single) { @@ -3998,9 +4153,9 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); // Avoid unnecessary undefs by reusing the other operand. - if (Op01.getOpcode() == ISD::UNDEF) + if (Op01.isUndef()) Op01 = Op23; - else if (Op23.getOpcode() == ISD::UNDEF) + else if (Op23.isUndef()) Op23 = Op01; // Merging identical replications is a no-op. if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) @@ -4034,7 +4189,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, for (unsigned I = 0; I < NumElements; ++I) if (!Constants[I].getNode()) Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); - Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants); + Result = DAG.getBuildVector(VT, DL, Constants); } else { // Otherwise try to use VLVGP to start the sequence in order to // avoid a false dependency on any previous contents of the vector @@ -4042,8 +4197,8 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, // is defined. unsigned I1 = NumElements / 2 - 1; unsigned I2 = NumElements - 1; - bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF); - bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF); + bool Def1 = !Elems[I1].isUndef(); + bool Def2 = !Elems[I2].isUndef(); if (Def1 || Def2) { SDValue Elem1 = Elems[Def1 ? I1 : I2]; SDValue Elem2 = Elems[Def2 ? I2 : I1]; @@ -4057,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, // Use VLVGx to insert the other elements. for (unsigned I = 0; I < NumElements; ++I) - if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF) + if (!Done[I] && !Elems[I].isUndef()) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], DAG.getConstant(I, DL, MVT::i32)); return Result; @@ -4120,8 +4275,7 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, } // See if we should use shuffles to construct the vector from other vectors. - SDValue Res = tryBuildVectorShuffle(DAG, BVN); - if (Res.getNode()) + if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) return Res; // Detect SCALAR_TO_VECTOR conversions. @@ -4312,6 +4466,10 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); case ISD::BR_CC: return lowerBR_CC(Op, DAG); case ISD::SELECT_CC: @@ -4336,6 +4494,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::GET_DYNAMIC_AREA_OFFSET: + return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); case ISD::SMUL_LOHI: return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: @@ -4348,12 +4508,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerOR(Op, DAG); case ISD::CTPOP: return lowerCTPOP(Op, DAG); - case ISD::CTLZ_ZERO_UNDEF: - return DAG.getNode(ISD::CTLZ, SDLoc(Op), - Op.getValueType(), Op.getOperand(0)); - case ISD::CTTZ_ZERO_UNDEF: - return DAG.getNode(ISD::CTTZ, SDLoc(Op), - Op.getValueType(), Op.getOperand(0)); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -4457,6 +4613,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); @@ -4506,6 +4663,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); + OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -4518,6 +4676,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(LRV); + OPCODE(STRV); OPCODE(PREFETCH); } return nullptr; @@ -4535,8 +4695,9 @@ static bool canTreatAsByteVector(EVT VT) { // of the input vector and Index is the index (based on type VecVT) that // should be extracted. Return the new extraction if a simplification // was possible or if Force is true. -SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, - SDValue Op, unsigned Index, +SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, + EVT VecVT, SDValue Op, + unsigned Index, DAGCombinerInfo &DCI, bool Force) const { SelectionDAG &DAG = DCI.DAG; @@ -4639,9 +4800,8 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, // Optimize vector operations in scalar value Op on the basis that Op // is truncated to TruncVT. -SDValue -SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, - DAGCombinerInfo &DCI) const { +SDValue SystemZTargetLowering::combineTruncateExtract( + const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { // If we have (trunc (extract_vector_elt X, Y)), try to turn it into // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements // of type TruncVT. @@ -4675,145 +4835,295 @@ SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, return SDValue(); } -SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { +SDValue SystemZTargetLowering::combineSIGN_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext (ashr (shl X, C1), C2)) to + // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as + // cheap as narrower ones. SelectionDAG &DAG = DCI.DAG; - unsigned Opcode = N->getOpcode(); - if (Opcode == ISD::SIGN_EXTEND) { - // Convert (sext (ashr (shl X, C1), C2)) to - // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as - // cheap as narrower ones. - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { - auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - SDValue Inner = N0.getOperand(0); - if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { - if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { - unsigned Extra = (VT.getSizeInBits() - - N0.getValueType().getSizeInBits()); - unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; - unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; - EVT ShiftVT = N0.getOperand(1).getValueType(); - SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, - Inner.getOperand(0)); - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, - DAG.getConstant(NewShlAmt, SDLoc(Inner), - ShiftVT)); - return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, - DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); - } + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { + auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + SDValue Inner = N0.getOperand(0); + if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { + if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { + unsigned Extra = (VT.getSizeInBits() - + N0.getValueType().getSizeInBits()); + unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; + unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; + EVT ShiftVT = N0.getOperand(1).getValueType(); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, + Inner.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); + return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); } } } - if (Opcode == SystemZISD::MERGE_HIGH || - Opcode == SystemZISD::MERGE_LOW) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - if (Op0.getOpcode() == ISD::BITCAST) - Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { - // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF - // for v4f32. - if (Op1 == N->getOperand(0)) - return Op1; - // (z_merge_? 0, X) -> (z_unpackl_? 0, X). - EVT VT = Op1.getValueType(); - unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); - if (ElemBytes <= 4) { - Opcode = (Opcode == SystemZISD::MERGE_HIGH ? - SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); - EVT InVT = VT.changeVectorElementTypeToInteger(); - EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), - SystemZ::VectorBytes / ElemBytes / 2); - if (VT != InVT) { - Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); - DCI.AddToWorklist(Op1.getNode()); - } - SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); - DCI.AddToWorklist(Op.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineMERGE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + unsigned Opcode = N->getOpcode(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op0.getOpcode() == SystemZISD::BYTE_MASK && + cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { + // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF + // for v4f32. + if (Op1 == N->getOperand(0)) + return Op1; + // (z_merge_? 0, X) -> (z_unpackl_? 0, X). + EVT VT = Op1.getValueType(); + unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); + if (ElemBytes <= 4) { + Opcode = (Opcode == SystemZISD::MERGE_HIGH ? + SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); + EVT InVT = VT.changeVectorElementTypeToInteger(); + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), + SystemZ::VectorBytes / ElemBytes / 2); + if (VT != InVT) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); + DCI.AddToWorklist(Op1.getNode()); } + SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); + DCI.AddToWorklist(Op.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSTORE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + auto *SN = cast<StoreSDNode>(N); + auto &Op1 = N->getOperand(1); + EVT MemVT = SN->getMemoryVT(); // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better // for the extraction to be done on a vMiN value, so that we can use VSTE. // If X has wider elements then convert it to: // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). - if (Opcode == ISD::STORE) { - auto *SN = cast<StoreSDNode>(N); - EVT MemVT = SN->getMemoryVT(); - if (MemVT.isInteger()) { - SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, - SN->getValue(), DCI); - if (Value.getNode()) { - DCI.AddToWorklist(Value.getNode()); - - // Rewrite the store with the new form of stored value. - return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, - SN->getBasePtr(), SN->getMemoryVT(), - SN->getMemOperand()); - } + if (MemVT.isInteger()) { + if (SDValue Value = + combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { + DCI.AddToWorklist(Value.getNode()); + + // Rewrite the store with the new form of stored value. + return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, + SN->getBasePtr(), SN->getMemoryVT(), + SN->getMemOperand()); } } - // Try to simplify a vector extraction. - if (Opcode == ISD::EXTRACT_VECTOR_ELT) { - if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { - SDValue Op0 = N->getOperand(0); - EVT VecVT = Op0.getValueType(); - return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, - IndexN->getZExtValue(), DCI, false); + // Combine STORE (BSWAP) into STRVH/STRV/STRVG + // See comment in combineBSWAP about volatile accesses. + if (!SN->isVolatile() && + Op1.getOpcode() == ISD::BSWAP && + Op1.getNode()->hasOneUse() && + (Op1.getValueType() == MVT::i16 || + Op1.getValueType() == MVT::i32 || + Op1.getValueType() == MVT::i64)) { + + SDValue BSwapOp = Op1.getOperand(0); + + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); + + SDValue Ops[] = { + N->getOperand(0), BSwapOp, N->getOperand(2), + DAG.getValueType(Op1.getValueType()) + }; + + return + DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( + SDNode *N, DAGCombinerInfo &DCI) const { + // Try to simplify a vector extraction. + if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = Op0.getValueType(); + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineJOIN_DWORDS( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; // (join_dwords X, X) == (replicate X) - if (Opcode == SystemZISD::JOIN_DWORDS && - N->getOperand(0) == N->getOperand(1)) + if (N->getOperand(0) == N->getOperand(1)) return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), N->getOperand(0)); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineFP_ROUND( + SDNode *N, DAGCombinerInfo &DCI) const { // (fround (extract_vector_elt X 0)) // (fround (extract_vector_elt X 1)) -> // (extract_vector_elt (VROUND X) 0) // (extract_vector_elt (VROUND X) 1) // // This is a special case since the target doesn't really support v2f32s. - if (Opcode == ISD::FP_ROUND) { - SDValue Op0 = N->getOperand(0); - if (N->getValueType(0) == MVT::f32 && - Op0.hasOneUse() && - Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Op0.getOperand(0).getValueType() == MVT::v2f64 && - Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { - SDValue Vec = Op0.getOperand(0); - for (auto *U : Vec->uses()) { - if (U != Op0.getNode() && - U->hasOneUse() && - U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - U->getOperand(0) == Vec && - U->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { - SDValue OtherRound = SDValue(*U->use_begin(), 0); - if (OtherRound.getOpcode() == ISD::FP_ROUND && - OtherRound.getOperand(0) == SDValue(U, 0) && - OtherRound.getValueType() == MVT::f32) { - SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), - MVT::v4f32, Vec); - DCI.AddToWorklist(VRound.getNode()); - SDValue Extract1 = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, - VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); - DCI.AddToWorklist(Extract1.getNode()); - DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); - SDValue Extract0 = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, - VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); - return Extract0; - } + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(0); + if (N->getValueType(0) == MVT::f32 && + Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v2f64 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && + U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { + SDValue OtherRound = SDValue(*U->use_begin(), 0); + if (OtherRound.getOpcode() == ISD::FP_ROUND && + OtherRound.getOperand(0) == SDValue(U, 0) && + OtherRound.getValueType() == MVT::f32) { + SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); + DCI.AddToWorklist(VRound.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, + VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, + VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + return Extract0; + } + } + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineBSWAP( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine BSWAP (LOAD) into LRVH/LRV/LRVG + // These loads are allowed to access memory multiple times, and so we must check + // that the loads are not volatile before performing the combine. + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || + N->getValueType(0) == MVT::i64) && + !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the byte-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(N->getValueType(0)) // VT + }; + SDValue BSLoad = + DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), + DAG.getVTList(N->getValueType(0) == MVT::i64 ? + MVT::i64 : MVT::i32, MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // If this is an i16 load, insert the truncate. + SDValue ResVal = BSLoad; + if (N->getValueType(0) == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); + + // First, combine the bswap away. This makes the value produced by the + // load dead. + DCI.CombineTo(N, ResVal); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the bswap is dead. + DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSHIFTROT( + SDNode *N, DAGCombinerInfo &DCI) const { + + SelectionDAG &DAG = DCI.DAG; + + // Shift/rotate instructions only use the last 6 bits of the second operand + // register. If the second operand is the result of an AND with an immediate + // value that has its last 6 bits set, we can safely remove the AND operation. + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::AND) { + auto *AndMask = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + + // The AND mask is constant + if (AndMask) { + auto AmtVal = AndMask->getZExtValue(); + + // Bottom 6 bits are set + if ((AmtVal & 0x3f) == 0x3f) { + SDValue AndOp = N1->getOperand(0); + + // This is the only use, so remove the node + if (N1.hasOneUse()) { + // Combine the AND away + DCI.CombineTo(N1.getNode(), AndOp); + + // Return N so it isn't rechecked + return SDValue(N, 0); + + // The node will be reused, so create a new node for this one use + } else { + SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0), N->getOperand(0), + AndOp); + DCI.AddToWorklist(Replace.getNode()); + + return Replace; } } } } + + return SDValue(); +} + +SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch(N->getOpcode()) { + default: break; + case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); + case SystemZISD::MERGE_HIGH: + case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); + case ISD::STORE: return combineSTORE(N, DCI); + case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); + case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::BSWAP: return combineBSWAP(N, DCI); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: return combineSHIFTROT(N, DCI); + } + return SDValue(); } @@ -4831,7 +5141,7 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { // Split MBB after MI and return the new block (the one that contains // instructions after MI). -static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, +static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB) { MachineBasicBlock *NewMBB = emitBlockAfter(MBB); NewMBB->splice(NewMBB->begin(), MBB, @@ -4841,7 +5151,7 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, } // Split MBB before MI and return the new block (the one that contains MI). -static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, +static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB) { MachineBasicBlock *NewMBB = emitBlockAfter(MBB); NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); @@ -4850,34 +5160,36 @@ static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, } // Force base value Base into a register before MI. Return the register. -static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, +static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { if (Base.isReg()) return Base.getReg(); - MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock *MBB = MI.getParent(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) - .addOperand(Base).addImm(0).addReg(0); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) + .addOperand(Base) + .addImm(0) + .addReg(0); return Reg; } // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * -SystemZTargetLowering::emitSelect(MachineInstr *MI, +SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned TrueReg = MI->getOperand(1).getReg(); - unsigned FalseReg = MI->getOperand(2).getReg(); - unsigned CCValid = MI->getOperand(3).getImm(); - unsigned CCMask = MI->getOperand(4).getImm(); - DebugLoc DL = MI->getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned TrueReg = MI.getOperand(1).getReg(); + unsigned FalseReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); @@ -4905,7 +5217,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, .addReg(TrueReg).addMBB(StartMBB) .addReg(FalseReg).addMBB(FalseMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return JoinMBB; } @@ -4913,21 +5225,21 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, // StoreOpcode is the store to use and Invert says whether the store should // happen when the condition is false rather than true. If a STORE ON // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. -MachineBasicBlock * -SystemZTargetLowering::emitCondStore(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned StoreOpcode, unsigned STOCOpcode, - bool Invert) const { +MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, + unsigned STOCOpcode, + bool Invert) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - unsigned SrcReg = MI->getOperand(0).getReg(); - MachineOperand Base = MI->getOperand(1); - int64_t Disp = MI->getOperand(2).getImm(); - unsigned IndexReg = MI->getOperand(3).getReg(); - unsigned CCValid = MI->getOperand(4).getImm(); - unsigned CCMask = MI->getOperand(5).getImm(); - DebugLoc DL = MI->getDebugLoc(); + unsigned SrcReg = MI.getOperand(0).getReg(); + MachineOperand Base = MI.getOperand(1); + int64_t Disp = MI.getOperand(2).getImm(); + unsigned IndexReg = MI.getOperand(3).getReg(); + unsigned CCValid = MI.getOperand(4).getImm(); + unsigned CCMask = MI.getOperand(5).getImm(); + DebugLoc DL = MI.getDebugLoc(); StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); @@ -4940,7 +5252,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) .addReg(SrcReg).addOperand(Base).addImm(Disp) .addImm(CCValid).addImm(CCMask); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } @@ -4969,7 +5281,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); MBB->addSuccessor(JoinMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return JoinMBB; } @@ -4980,12 +5292,9 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize // is one of the operands. Invert says whether the field should be // inverted after performing BinOpcode (e.g. for NAND). -MachineBasicBlock * -SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned BinOpcode, - unsigned BitSize, - bool Invert) const { +MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, + unsigned BitSize, bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -4994,15 +5303,15 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. - unsigned Dest = MI->getOperand(0).getReg(); - MachineOperand Base = earlyUseOperand(MI->getOperand(1)); - int64_t Disp = MI->getOperand(2).getImm(); - MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); - unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); - unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); - DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); + unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); + DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) - BitSize = MI->getOperand(6).getImm(); + BitSize = MI.getOperand(6).getImm(); // Subword operations use 32-bit registers. const TargetRegisterClass *RC = (BitSize <= 32 ? @@ -5090,7 +5399,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } @@ -5100,12 +5409,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, // minimum or maximum value. KeepOldMask is the BRC condition-code mask // for when the current field should be kept. BitSize is the width of // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. -MachineBasicBlock * -SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned CompareOpcode, - unsigned KeepOldMask, - unsigned BitSize) const { +MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, + unsigned KeepOldMask, unsigned BitSize) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -5113,15 +5419,15 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI->getOperand(0).getReg(); - MachineOperand Base = earlyUseOperand(MI->getOperand(1)); - int64_t Disp = MI->getOperand(2).getImm(); - unsigned Src2 = MI->getOperand(3).getReg(); - unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); - unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); - DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + unsigned Src2 = MI.getOperand(3).getReg(); + unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); + DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) - BitSize = MI->getOperand(6).getImm(); + BitSize = MI.getOperand(6).getImm(); // Subword operations use 32-bit registers. const TargetRegisterClass *RC = (BitSize <= 32 ? @@ -5209,30 +5515,31 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW // instruction MI. MachineBasicBlock * -SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, +SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI->getOperand(0).getReg(); - MachineOperand Base = earlyUseOperand(MI->getOperand(1)); - int64_t Disp = MI->getOperand(2).getImm(); - unsigned OrigCmpVal = MI->getOperand(3).getReg(); - unsigned OrigSwapVal = MI->getOperand(4).getReg(); - unsigned BitShift = MI->getOperand(5).getReg(); - unsigned NegBitShift = MI->getOperand(6).getReg(); - int64_t BitSize = MI->getOperand(7).getImm(); - DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + unsigned OrigCmpVal = MI.getOperand(3).getReg(); + unsigned OrigSwapVal = MI.getOperand(4).getReg(); + unsigned BitShift = MI.getOperand(5).getReg(); + unsigned NegBitShift = MI.getOperand(6).getReg(); + int64_t BitSize = MI.getOperand(7).getImm(); + DebugLoc DL = MI.getDebugLoc(); const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; @@ -5323,7 +5630,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } @@ -5331,18 +5638,18 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // if the high register of the GR128 value must be cleared or false if // it's "don't care". SubReg is subreg_l32 when extending a GR32 // and subreg_l64 when extending a GR64. -MachineBasicBlock * -SystemZTargetLowering::emitExt128(MachineInstr *MI, - MachineBasicBlock *MBB, - bool ClearEven, unsigned SubReg) const { +MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, + MachineBasicBlock *MBB, + bool ClearEven, + unsigned SubReg) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); @@ -5359,25 +5666,23 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) .addReg(In128).addReg(Src).addImm(SubReg); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } -MachineBasicBlock * -SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode) const { +MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); - uint64_t DestDisp = MI->getOperand(1).getImm(); - MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); - uint64_t SrcDisp = MI->getOperand(3).getImm(); - uint64_t Length = MI->getOperand(4).getImm(); + MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); + uint64_t DestDisp = MI.getOperand(1).getImm(); + MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2)); + uint64_t SrcDisp = MI.getOperand(3).getImm(); + uint64_t Length = MI.getOperand(4).getImm(); // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. @@ -5385,10 +5690,10 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. - if (MI->getNumExplicitOperands() > 5) { + if (MI.getNumExplicitOperands() > 5) { bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); - uint64_t StartCountReg = MI->getOperand(5).getReg(); + uint64_t StartCountReg = MI.getOperand(5).getReg(); uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); @@ -5491,15 +5796,19 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, // Apply them using LAY if so. if (!isUInt<12>(DestDisp)) { unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) - .addOperand(DestBase).addImm(DestDisp).addReg(0); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(DestBase) + .addImm(DestDisp) + .addReg(0); DestBase = MachineOperand::CreateReg(Reg, false); DestDisp = 0; } if (!isUInt<12>(SrcDisp)) { unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) - .addOperand(SrcBase).addImm(SrcDisp).addReg(0); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(SrcBase) + .addImm(SrcDisp) + .addReg(0); SrcBase = MachineOperand::CreateReg(Reg, false); SrcDisp = 0; } @@ -5527,26 +5836,24 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MBB->addLiveIn(SystemZ::CC); } - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } // Decompose string pseudo-instruction MI into a loop that continually performs // Opcode until CC != 3. -MachineBasicBlock * -SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode) const { +MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - uint64_t End1Reg = MI->getOperand(0).getReg(); - uint64_t Start1Reg = MI->getOperand(1).getReg(); - uint64_t Start2Reg = MI->getOperand(2).getReg(); - uint64_t CharReg = MI->getOperand(3).getReg(); + uint64_t End1Reg = MI.getOperand(0).getReg(); + uint64_t Start1Reg = MI.getOperand(1).getReg(); + uint64_t Start2Reg = MI.getOperand(2).getReg(); + uint64_t CharReg = MI.getOperand(3).getReg(); const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; uint64_t This1Reg = MRI.createVirtualRegister(RC); @@ -5589,26 +5896,24 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, DoneMBB->addLiveIn(SystemZ::CC); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } // Update TBEGIN instruction with final opcode and register clobbers. -MachineBasicBlock * -SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode, - bool NoFloat) const { +MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, + bool NoFloat) const { MachineFunction &MF = *MBB->getParent(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); // Update opcode. - MI->setDesc(TII->get(Opcode)); + MI.setDesc(TII->get(Opcode)); // We cannot handle a TBEGIN that clobbers the stack or frame pointer. // Make sure to add the corresponding GRSM bits if they are missing. - uint64_t Control = MI->getOperand(2).getImm(); + uint64_t Control = MI.getOperand(2).getImm(); static const unsigned GPRControlBit[16] = { 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 @@ -5616,13 +5921,13 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, Control |= GPRControlBit[15]; if (TFI->hasFP(MF)) Control |= GPRControlBit[11]; - MI->getOperand(2).setImm(Control); + MI.getOperand(2).setImm(Control); // Add GPR clobbers. for (int I = 0; I < 16; I++) { if ((Control & GPRControlBit[I]) == 0) { unsigned Reg = SystemZMC::GR64Regs[I]; - MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } @@ -5631,12 +5936,12 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, if (Subtarget.hasVector()) { for (int I = 0; I < 32; I++) { unsigned Reg = SystemZMC::VR128Regs[I]; - MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } else { for (int I = 0; I < 16; I++) { unsigned Reg = SystemZMC::FP64Regs[I]; - MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } } @@ -5644,17 +5949,15 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, return MBB; } -MachineBasicBlock * -SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode) const { +MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - unsigned SrcReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(0).getReg(); // Create new virtual register of the same class as source. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); @@ -5664,14 +5967,14 @@ SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI, // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) .addReg(SrcReg); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } -MachineBasicBlock *SystemZTargetLowering:: -EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { - switch (MI->getOpcode()) { +MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *MBB) const { + switch (MI.getOpcode()) { case SystemZ::Select32Mux: case SystemZ::Select32: case SystemZ::SelectF32: diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 391636e5467f..b1de8936beed 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -146,6 +146,9 @@ enum NodeType : unsigned { // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) SERIALIZE, + // Compiler barrier only; generate a no-op. + MEMBARRIER, + // Transaction begin. The first operand is the chain, the second // the TDB pointer, and the third the immediate control field. // Returns chain and glue. @@ -275,6 +278,12 @@ enum NodeType : unsigned { VSTRC_CC, VSTRCZ_CC, + // Test Data Class. + // + // Operand 0: the value to test + // Operand 1: the bit mask + TDC, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -308,6 +317,19 @@ enum NodeType : unsigned { // Operand 5: the width of the field in bits (8 or 16) ATOMIC_CMP_SWAPW, + // Byte swapping load. + // + // Operand 0: the address to load from + // Operand 1: the type of load (i16, i32, i64) + LRV, + + // Byte swapping store. + // + // Operand 0: the value to store + // Operand 1: the address to store to + // Operand 2: the type of store (i16, i32, i64) + STRV, + // Prefetch from the second operand using the 4-bit control code in // the first operand. The code is 1 for a load prefetch and 2 for // a store prefetch. @@ -423,16 +445,23 @@ public: return SystemZ::R7D; } - MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const - override; + /// Override to support customized stack guard loading. + bool useLoadStackGuardNode() const override { + return true; + } + void insertSSPDeclarations(Module &M) const override { + } + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; bool allowTruncateForTailCall(Type *, Type *) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc DL, SelectionDAG &DAG, + const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; @@ -443,12 +472,20 @@ public: LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc DL, SelectionDAG &DAG) const override; - SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, + const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const override; + SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::ANY_EXTEND; + } + + bool supportSwiftError() const override { + return true; + } + private: const SystemZSubtarget &Subtarget; @@ -461,15 +498,19 @@ private: SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, SelectionDAG &DAG, unsigned Opcode, SDValue GOTOffset) const; + SDValue lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const; SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const; SDValue lowerBlockAddress(BlockAddressSDNode *Node, SelectionDAG &DAG) const; SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const; SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; @@ -477,6 +518,7 @@ private: SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, @@ -498,11 +540,19 @@ private: unsigned UnpackHigh) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; - SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, + SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, unsigned Index, DAGCombinerInfo &DCI, bool Force) const; - SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, + SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const; + SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. @@ -514,40 +564,33 @@ private: MachineBasicBlock *Target) const; // Implement EmitInstrWithCustomInserter for individual operation types. - MachineBasicBlock *emitSelect(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitCondStore(MachineInstr *MI, - MachineBasicBlock *BB, + MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; - MachineBasicBlock *emitExt128(MachineInstr *MI, - MachineBasicBlock *MBB, + MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const; - MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI, + MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode, unsigned BitSize, bool Invert = false) const; - MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI, + MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, unsigned KeepOldMask, unsigned BitSize) const; - MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, + MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, - MachineBasicBlock *BB, + MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) const; - MachineBasicBlock *emitStringWrapper(MachineInstr *MI, - MachineBasicBlock *BB, + MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) const; - MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *emitTransactionBegin(MachineInstr &MI, MachineBasicBlock *MBB, - unsigned Opcode, - bool NoFloat) const; - MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI, + unsigned Opcode, bool NoFloat) const; + MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const; - }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h index 5a1c874dfa36..2cb8aba1b322 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -29,7 +29,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) { MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo *MFFrame = MF.getFrameInfo(); const MCInstrDesc &MCID = MI->getDesc(); - unsigned Flags = 0; + auto Flags = MachineMemOperand::MONone; if (MCID.mayLoad()) Flags |= MachineMemOperand::MOLoad; if (MCID.mayStore()) diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 0cb267290cc1..8b32047e08e3 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -37,6 +37,10 @@ let hasSideEffects = 0 in { def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>; def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>; def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>; + + // For z13 we prefer LDR over LER to avoid partial register dependencies. + let isCodeGenOnly = 1 in + def LDR32 : UnaryRR<"ld", 0x28, null_frag, FP32, FP32>; } // Moves between two floating-point registers that also set the condition @@ -443,6 +447,13 @@ let Defs = [CC], CCValues = 0xF in { def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; } +// Test Data Class. +let Defs = [CC], CCValues = 0xC in { + def TCEB : TestRXE<"tceb", 0xED10, z_tdc, FP32>; + def TCDB : TestRXE<"tcdb", 0xED11, z_tdc, FP64>; + def TCXB : TestRXE<"tcxb", 0xED12, z_tdc, FP128>; +} + //===----------------------------------------------------------------------===// // Peepholes //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 01f4cdec05cb..973894d5c001 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -158,6 +158,17 @@ def getThreeOperandOpcode : InstrMapping { // //===----------------------------------------------------------------------===// +class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<2, outs, ins, asmstr, pattern> { + field bits<16> Inst; + field bits<16> SoftFail = 0; + + bits<8> I1; + + let Inst{15-8} = op; + let Inst{7-0} = I1; +} + class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; @@ -172,6 +183,24 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{15-0} = I2; } +class InstRIEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<16> I2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -260,6 +289,24 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{31-0} = I2; } +class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<8> I2; + bits<4> M3; + bits<16> BD4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = M3; + let Inst{31-16} = BD4; + let Inst{15-8} = I2; + let Inst{7-0} = op{7-0}; +} + class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<2, outs, ins, asmstr, pattern> { field bits<16> Inst; @@ -320,6 +367,41 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{3-0} = R2; } +class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + + let Inst{31-16} = op; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M3; + bits<16> BD4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-16} = BD4; + let Inst{15-12} = M3; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; @@ -919,6 +1001,10 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> // Compare: // Two input operands and an implicit CC output operand. // +// Test: +// Two input operands and an implicit CC output operand. The second +// input operand is an "address" operand used as a test class mask. +// // Ternary: // One register output operand and three input operands. // @@ -974,12 +1060,30 @@ class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> let DisableEncoding = "$R1src"; } -class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2), +class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr12only> + : InstRS<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayLoad = 1; +} + +class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2), mnemonic#"\t$R1, $R3, $BD2", []> { let mayLoad = 1; } +multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode, + bits<16> rsyOpcode, RegisterOperand cls> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>; + let DispSize = "20" in + def Y : LoadMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>; + } +} + class LoadMultipleVRSa<string mnemonic, bits<16> opcode> : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2), mnemonic#"\t$V1, $V3, $BD2", []> { @@ -1055,12 +1159,30 @@ class StoreLengthVRSb<string mnemonic, bits<16> opcode, let AccessBytes = bytes; } -class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> - : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2), +class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr12only> + : InstRS<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + let mayStore = 1; +} + +class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2), mnemonic#"\t$R1, $R3, $BD2", []> { let mayStore = 1; } +multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode, + bits<16> rsyOpcode, RegisterOperand cls> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>; + let DispSize = "20" in + def Y : StoreMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>; + } +} + class StoreMultipleVRSa<string mnemonic, bits<16> opcode> : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2), mnemonic#"\t$V1, $V3, $BD2", []> { @@ -1186,6 +1308,15 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, let R4 = 0; } +class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm> + : InstRIEd<opcode, (outs cls:$R1), + (ins imm:$I2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $I2", []>, + Requires<[FeatureLoadStoreOnCond2]> { + let CCMaskLast = 1; +} + // Like CondUnaryRRF, but used for the raw assembly form. The condition-code // mask is the third operand rather than being part of the mnemonic. class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, @@ -1198,6 +1329,16 @@ class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, let R4 = 0; } +class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm> + : InstRIEd<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, imm32zx4:$R3), + mnemonic#"\t$R1, $I2, $R3", []>, + Requires<[FeatureLoadStoreOnCond2]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + // Like CondUnaryRRF, but with a fixed CC mask. class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, bits<4> ccmask> @@ -1210,6 +1351,17 @@ class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, let R4 = 0; } +class FixedCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm, bits<4> ccmask> + : InstRIEd<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2), + mnemonic#"\t$R1, $I2", []>, + Requires<[FeatureLoadStoreOnCond2]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; +} + class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRI<opcode, (outs cls:$R1), (ins imm:$I2), @@ -1391,9 +1543,9 @@ class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2), + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3), mnemonic#"r\t$R1, $R3, $R2", - [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> { + [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> { let OpKey = mnemonic ## cls1; let OpType = "reg"; let R4 = 0; @@ -1874,6 +2026,14 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M5 = 0; } +class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), + mnemonic#"\t$R1, $XBD2", + [(operator cls:$R1, bdxaddr12only:$XBD2)]> { + let M3 = 0; +} + class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), @@ -1885,6 +2045,40 @@ class TernaryRRD<string mnemonic, bits<16> opcode, let DisableEncoding = "$R1src"; } +class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr12only> + : InstRS<opcode, (outs cls:$R1), + (ins cls:$R1src, imm32zx4:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), + (ins cls:$R1src, imm32zx4:$R3, mode:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []> { + + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, + RegisterOperand cls, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>; + let DispSize = "20" in + def Y : TernaryRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>; + } +} + class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXF<opcode, (outs cls:$R1), diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index e6b5fc8e6235..4084e93e5acb 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -15,6 +15,7 @@ #include "SystemZInstrBuilder.h" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -54,7 +55,7 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, // Get two load or store instructions. Use the original instruction for one // of them (arbitrarily the second here) and create a clone for the other. - MachineInstr *EarlierMI = MF.CloneMachineInstr(MI); + MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI); MBB->insert(MI, EarlierMI); // Set up the two 64-bit registers. @@ -69,8 +70,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineOperand &LowOffsetOp = MI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); - // Clear the kill flags for the base and index registers in the first - // instruction. + // Clear the kill flags for the base and index registers in the first + // instruction. EarlierMI->getOperand(1).setIsKill(false); EarlierMI->getOperand(3).setIsKill(false); @@ -105,59 +106,89 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { // and HighOpcode takes an unsigned 32-bit operand. In those cases, // MI has the same kind of operand as LowOpcode, so needs to be converted // if HighOpcode is used. -void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, +void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode, bool ConvertHigh) const { - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); bool IsHigh = isHighReg(Reg); - MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode)); + MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode)); if (IsHigh && ConvertHigh) - MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm())); + MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm())); } // MI is a three-operand RIE-style pseudo instruction. Replace it with // LowOpcodeK if the registers are both low GR32s, otherwise use a move // followed by HighOpcode or LowOpcode, depending on whether the target // is a high or low GR32. -void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, +void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned LowOpcodeK, unsigned HighOpcode) const { - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); bool DestIsHigh = isHighReg(DestReg); bool SrcIsHigh = isHighReg(SrcReg); if (!DestIsHigh && !SrcIsHigh) - MI->setDesc(get(LowOpcodeK)); + MI.setDesc(get(LowOpcodeK)); else { - emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(), - DestReg, SrcReg, SystemZ::LR, 32, - MI->getOperand(1).isKill()); - MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); - MI->getOperand(1).setReg(DestReg); - MI->tieOperands(0, 1); + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg, + SystemZ::LR, 32, MI.getOperand(1).isKill()); + MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); + MI.getOperand(1).setReg(DestReg); + MI.tieOperands(0, 1); } } // MI is an RXY-style pseudo instruction. Replace it with LowOpcode // if the first operand is a low GR32 and HighOpcode if the first operand // is a high GR32. -void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, +void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const { - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode, - MI->getOperand(2).getImm()); - MI->setDesc(get(Opcode)); + MI.getOperand(2).getImm()); + MI.setDesc(get(Opcode)); } // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. -void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, +void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const { - emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(), - MI->getOperand(0).getReg(), MI->getOperand(1).getReg(), - LowOpcode, Size, MI->getOperand(1).isKill()); - MI->eraseFromParent(); + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode, + Size, MI.getOperand(1).isKill()); + MI.eraseFromParent(); +} + +void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const unsigned Reg = MI->getOperand(0).getReg(); + + // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD, + // so they already have operand 0 set to reg. + + // ear <reg>, %a0 + MachineInstr *Ear1MI = MF.CloneMachineInstr(MI); + MBB->insert(MI, Ear1MI); + Ear1MI->setDesc(get(SystemZ::EAR)); + MachineInstrBuilder(MF, Ear1MI).addImm(0); + + // sllg <reg>, <reg>, 32 + MachineInstr *SllgMI = MF.CloneMachineInstr(MI); + MBB->insert(MI, SllgMI); + SllgMI->setDesc(get(SystemZ::SLLG)); + MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32); + + // ear <reg>, %a1 + MachineInstr *Ear2MI = MF.CloneMachineInstr(MI); + MBB->insert(MI, Ear2MI); + Ear2MI->setDesc(get(SystemZ::EAR)); + MachineInstrBuilder(MF, Ear2MI).addImm(1); + + // lg <reg>, 40(<reg>) + MI->setDesc(get(SystemZ::LG)); + MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0); } // Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR @@ -167,7 +198,7 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, // KillSrc is true if this move is the last use of SrcReg. void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned DestReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, unsigned LowLowOpcode, unsigned Size, bool KillSrc) const { unsigned Opcode; @@ -196,45 +227,41 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, // Return 0 otherwise. // // Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. -static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, +static int isSimpleMove(const MachineInstr &MI, int &FrameIndex, unsigned Flag) { - const MCInstrDesc &MCID = MI->getDesc(); - if ((MCID.TSFlags & Flag) && - MI->getOperand(1).isFI() && - MI->getOperand(2).getImm() == 0 && - MI->getOperand(3).getReg() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + const MCInstrDesc &MCID = MI.getDesc(); + if ((MCID.TSFlags & Flag) && MI.getOperand(1).isFI() && + MI.getOperand(2).getImm() == 0 && MI.getOperand(3).getReg() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } return 0; } -unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad); } -unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, +unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); } -bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI, +bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex, int &SrcFrameIndex) const { // Check for MVC 0(Length,FI1),0(FI2) - const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo(); - if (MI->getOpcode() != SystemZ::MVC || - !MI->getOperand(0).isFI() || - MI->getOperand(1).getImm() != 0 || - !MI->getOperand(3).isFI() || - MI->getOperand(4).getImm() != 0) + const MachineFrameInfo *MFI = MI.getParent()->getParent()->getFrameInfo(); + if (MI.getOpcode() != SystemZ::MVC || !MI.getOperand(0).isFI() || + MI.getOperand(1).getImm() != 0 || !MI.getOperand(3).isFI() || + MI.getOperand(4).getImm() != 0) return false; // Check that Length covers the full slots. - int64_t Length = MI->getOperand(2).getImm(); - unsigned FI1 = MI->getOperand(0).getIndex(); - unsigned FI2 = MI->getOperand(3).getIndex(); + int64_t Length = MI.getOperand(2).getImm(); + unsigned FI1 = MI.getOperand(0).getIndex(); + unsigned FI2 = MI.getOperand(3).getIndex(); if (MFI->getObjectSize(FI1) != Length || MFI->getObjectSize(FI2) != Length) return false; @@ -244,7 +271,7 @@ bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI, return true; } -bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -261,7 +288,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Working from the bottom, when we see a non-terminator instruction, we're // done. - if (!isUnpredicatedTerminator(I)) + if (!isUnpredicatedTerminator(*I)) break; // A terminator that isn't a branch can't easily be handled by this @@ -270,7 +297,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Can't handle indirect branches. - SystemZII::Branch Branch(getBranchInfo(I)); + SystemZII::Branch Branch(getBranchInfo(*I)); if (!Branch.Target->isMBB()) return true; @@ -347,7 +374,7 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { continue; if (!I->isBranch()) break; - if (!getBranchInfo(I).Target->isMBB()) + if (!getBranchInfo(*I).Target->isMBB()) break; // Remove the branch. I->eraseFromParent(); @@ -365,11 +392,11 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -unsigned -SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - ArrayRef<MachineOperand> Cond, - DebugLoc DL) const { +unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + const DebugLoc &DL) const { // In this function we output 32-bit branches, which should always // have enough range. They can be shortened and relaxed by later code // in the pipeline, if desired. @@ -402,17 +429,16 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } -bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const { - assert(MI->isCompare() && "Caller should have checked for a comparison"); +bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, + int &Value) const { + assert(MI.isCompare() && "Caller should have checked for a comparison"); - if (MI->getNumExplicitOperands() == 2 && - MI->getOperand(0).isReg() && - MI->getOperand(1).isImm()) { - SrcReg = MI->getOperand(0).getReg(); + if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() && + MI.getOperand(1).isImm()) { + SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; - Value = MI->getOperand(1).getImm(); + Value = MI.getOperand(1).getImm(); Mask = ~0; return true; } @@ -445,7 +471,7 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { // the result of an IPM sequence whose input CC survives until Compare, // and whether Compare is therefore redundant. Delete it and return // true if so. -static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, +static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI) { MachineInstr *LGFR = nullptr; @@ -466,16 +492,16 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, return false; // Check that there are no assignments to CC between the IPM and Compare, - if (IPM->getParent() != Compare->getParent()) + if (IPM->getParent() != Compare.getParent()) return false; - MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare; + MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator(); for (++MBBI; MBBI != MBBE; ++MBBI) { - MachineInstr *MI = MBBI; - if (MI->modifiesRegister(SystemZ::CC, TRI)) + MachineInstr &MI = *MBBI; + if (MI.modifiesRegister(SystemZ::CC, TRI)) return false; } - Compare->eraseFromParent(); + Compare.eraseFromParent(); if (LGFR) eraseIfDead(LGFR, MRI); eraseIfDead(RLL, MRI); @@ -485,13 +511,11 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, return true; } -bool -SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, - const MachineRegisterInfo *MRI) const { +bool SystemZInstrInfo::optimizeCompareInstr( + MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask, + int Value, const MachineRegisterInfo *MRI) const { assert(!SrcReg2 && "Only optimizing constant comparisons so far"); - bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; + bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0; return Value == 0 && !IsLogical && removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } @@ -506,15 +530,43 @@ static unsigned getConditionalMove(unsigned Opcode) { } } -bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const { - unsigned Opcode = MI->getOpcode(); - return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode); +static unsigned getConditionalLoadImmediate(unsigned Opcode) { + switch (Opcode) { + case SystemZ::LHI: return SystemZ::LOCHI; + case SystemZ::LGHI: return SystemZ::LOCGHI; + default: return 0; + } +} + +bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode)) + return true; + if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode)) + return true; + if (Opcode == SystemZ::Return || + Opcode == SystemZ::Trap || + Opcode == SystemZ::CallJG || + Opcode == SystemZ::CallBR) + return true; + return false; } bool SystemZInstrInfo:: isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const { + // Avoid using conditional returns at the end of a loop (since then + // we'd need to emit an unconditional branch to the beginning anyway, + // making the loop body longer). This doesn't apply for low-probability + // loops (eg. compare-and-swap retry), so just decide based on branch + // probability instead of looping structure. + // However, since Compare and Trap instructions cost the same as a regular + // Compare instruction, we should allow the if conversion to convert this + // into a Conditional Compare regardless of the branch probability. + if (MBB.getLastNonDebugInstr()->getOpcode() != SystemZ::Trap && + MBB.succ_empty() && Probability < BranchProbability(1, 8)) + return false; // For now only convert single instructions. return NumCycles == 1; } @@ -530,27 +582,82 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, } bool SystemZInstrInfo:: -PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const { +isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const { + // For now only duplicate single instructions. + return NumCycles == 1; +} + +bool SystemZInstrInfo::PredicateInstruction( + MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { assert(Pred.size() == 2 && "Invalid condition"); unsigned CCValid = Pred[0].getImm(); unsigned CCMask = Pred[1].getImm(); assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); - unsigned Opcode = MI->getOpcode(); + unsigned Opcode = MI.getOpcode(); if (STI.hasLoadStoreOnCond()) { if (unsigned CondOpcode = getConditionalMove(Opcode)) { - MI->setDesc(get(CondOpcode)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addImm(CCValid).addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit); + MI.setDesc(get(CondOpcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid) + .addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); return true; } } + if (STI.hasLoadStoreOnCond2()) { + if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) { + MI.setDesc(get(CondOpcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid) + .addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + } + if (Opcode == SystemZ::Trap) { + MI.setDesc(get(SystemZ::CondTrap)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + if (Opcode == SystemZ::Return) { + MI.setDesc(get(SystemZ::CondReturn)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + if (Opcode == SystemZ::CallJG) { + MachineOperand FirstOp = MI.getOperand(0); + const uint32_t *RegMask = MI.getOperand(1).getRegMask(); + MI.RemoveOperand(1); + MI.RemoveOperand(0); + MI.setDesc(get(SystemZ::CallBRCL)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addOperand(FirstOp) + .addRegMask(RegMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } + if (Opcode == SystemZ::CallBR) { + const uint32_t *RegMask = MI.getOperand(0).getRegMask(); + MI.RemoveOperand(0); + MI.setDesc(get(SystemZ::CallBCR)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addRegMask(RegMask) + .addReg(SystemZ::CC, RegState::Implicit); + return true; + } return false; } void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned DestReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too. if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { @@ -571,7 +678,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LGR; else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) - Opcode = SystemZ::LER; + // For z13 we prefer LDR over LER to avoid partial register dependencies. + Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER; else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LDR; else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) @@ -654,6 +762,14 @@ static LogicOp interpretAndImmediate(unsigned Opcode) { } } +static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) { + if (OldMI->registerDefIsDead(SystemZ::CC)) { + MachineOperand *CCDef = NewMI->findRegisterDefOperand(SystemZ::CC); + if (CCDef != nullptr) + CCDef->setIsDead(true); + } +} + // Used to return from convertToThreeAddress after replacing two-address // instruction OldMI with three-address instruction NewMI. static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, @@ -664,31 +780,29 @@ static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, for (unsigned I = 1; I < NumOps; ++I) { MachineOperand &Op = OldMI->getOperand(I); if (Op.isReg() && Op.isKill()) - LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI); + LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI); } } + transferDeadCC(OldMI, NewMI); return NewMI; } -MachineInstr * -SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const { - MachineInstr *MI = MBBI; - MachineBasicBlock *MBB = MI->getParent(); +MachineInstr *SystemZInstrInfo::convertToThreeAddress( + MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { + MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Opcode = MI->getOpcode(); - unsigned NumOps = MI->getNumOperands(); + unsigned Opcode = MI.getOpcode(); + unsigned NumOps = MI.getNumOperands(); // Try to convert something like SLL into SLLK, if supported. // We prefer to keep the two-operand form where possible both // because it tends to be shorter and because some instructions // have memory forms that can be used during spilling. if (STI.hasDistinctOps()) { - MachineOperand &Dest = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); unsigned DestReg = Dest.getReg(); unsigned SrcReg = Src.getReg(); // AHIMux is only really a three-operand instruction when both operands @@ -707,23 +821,23 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // Create three address instruction without adding the implicit // operands. Those will instead be copied over from the original // instruction by the loop below. - MachineInstrBuilder MIB(*MF, - MF->CreateMachineInstr(get(ThreeOperandOpcode), - MI->getDebugLoc(), /*NoImplicit=*/true)); + MachineInstrBuilder MIB( + *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(), + /*NoImplicit=*/true)); MIB.addOperand(Dest); // Keep the kill state, but drop the tied flag. MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); // Keep the remaining operands as-is. for (unsigned I = 2; I < NumOps; ++I) - MIB.addOperand(MI->getOperand(I)); + MIB.addOperand(MI.getOperand(I)); MBB->insert(MI, MIB); - return finishConvertToThreeAddress(MI, MIB, LV); + return finishConvertToThreeAddress(&MI, MIB, LV); } } // Try to convert an AND into an RISBG-type instruction. if (LogicOp And = interpretAndImmediate(Opcode)) { - uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB; + uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB; // AND IMMEDIATE leaves the other bits of the register unchanged. Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); unsigned Start, End; @@ -739,36 +853,55 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Start &= 31; End &= 31; } - MachineOperand &Dest = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); MachineInstrBuilder MIB = - BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode)) - .addOperand(Dest).addReg(0) - .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()) - .addImm(Start).addImm(End + 128).addImm(0); - return finishConvertToThreeAddress(MI, MIB, LV); + BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode)) + .addOperand(Dest) + .addReg(0) + .addReg(Src.getReg(), getKillRegState(Src.isKill()), + Src.getSubReg()) + .addImm(Start) + .addImm(End + 128) + .addImm(0); + return finishConvertToThreeAddress(&MI, MIB, LV); } } return nullptr; } MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( - MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex) const { + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, + LiveIntervals *LIS) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); - unsigned Opcode = MI->getOpcode(); + unsigned Opcode = MI.getOpcode(); if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { - if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && - isInt<8>(MI->getOperand(2).getImm()) && - !MI->getOperand(3).getReg()) { - // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST - return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), - get(SystemZ::AGSI)) - .addFrameIndex(FrameIndex) - .addImm(0) - .addImm(MI->getOperand(2).getImm()); + if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && + isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) { + + // Check CC liveness, since new instruction introduces a dead + // def of CC. + MCRegUnitIterator CCUnit(SystemZ::CC, TRI); + LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit); + ++CCUnit; + assert (!CCUnit.isValid() && "CC only has one reg unit."); + SlotIndex MISlot = + LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); + if (!CCLiveRange.liveAt(MISlot)) { + // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST + MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(2).getImm()); + BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true); + CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator()); + return BuiltMI; + } } return nullptr; } @@ -778,20 +911,23 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( return nullptr; unsigned OpNum = Ops[0]; - assert(Size == MF.getRegInfo() - .getRegClass(MI->getOperand(OpNum).getReg())->getSize() && + assert(Size == + MF.getRegInfo() + .getRegClass(MI.getOperand(OpNum).getReg()) + ->getSize() && "Invalid size combination"); - if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && - OpNum == 0 && - isInt<8>(MI->getOperand(2).getImm())) { + if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 && + isInt<8>(MI.getOperand(2).getImm())) { // A(G)HI %reg, CONST -> A(G)SI %mem, CONST Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); - return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), - get(Opcode)) - .addFrameIndex(FrameIndex) - .addImm(0) - .addImm(MI->getOperand(2).getImm()); + MachineInstr *BuiltMI = + BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI.getOperand(2).getImm()); + transferDeadCC(&MI, BuiltMI); + return BuiltMI; } if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { @@ -801,9 +937,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // source register instead. if (OpNum == 0) { unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; - return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(StoreOpcode)) - .addOperand(MI->getOperand(1)) + .addOperand(MI.getOperand(1)) .addFrameIndex(FrameIndex) .addImm(0) .addReg(0); @@ -812,8 +948,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // destination register instead. if (OpNum == 1) { unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; - unsigned Dest = MI->getOperand(0).getReg(); - return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + unsigned Dest = MI.getOperand(0).getReg(); + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpcode), Dest) .addFrameIndex(FrameIndex) .addImm(0) @@ -834,26 +970,26 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // might be equal. We don't worry about that case here, because spill slot // coloring happens later, and because we have special code to remove // MVCs that turn out to be redundant. - if (OpNum == 0 && MI->hasOneMemOperand()) { - MachineMemOperand *MMO = *MI->memoperands_begin(); + if (OpNum == 0 && MI.hasOneMemOperand()) { + MachineMemOperand *MMO = *MI.memoperands_begin(); if (MMO->getSize() == Size && !MMO->isVolatile()) { // Handle conversion of loads. - if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { - return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) { + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(SystemZ::MVC)) .addFrameIndex(FrameIndex) .addImm(0) .addImm(Size) - .addOperand(MI->getOperand(1)) - .addImm(MI->getOperand(2).getImm()) + .addOperand(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) .addMemOperand(MMO); } // Handle conversion of stores. - if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { - return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) { + return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(SystemZ::MVC)) - .addOperand(MI->getOperand(1)) - .addImm(MI->getOperand(2).getImm()) + .addOperand(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) .addImm(Size) .addFrameIndex(FrameIndex) .addImm(0) @@ -866,7 +1002,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // into <INSN>. int MemOpcode = SystemZ::getMemOpcode(Opcode); if (MemOpcode >= 0) { - unsigned NumOps = MI->getNumExplicitOperands(); + unsigned NumOps = MI.getNumExplicitOperands(); if (OpNum == NumOps - 1) { const MCInstrDesc &MemDesc = get(MemOpcode); uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); @@ -874,12 +1010,13 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( assert(AccessBytes <= Size && "Access outside the frame index"); uint64_t Offset = Size - AccessBytes; MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, - MI->getDebugLoc(), get(MemOpcode)); + MI.getDebugLoc(), get(MemOpcode)); for (unsigned I = 0; I < OpNum; ++I) - MIB.addOperand(MI->getOperand(I)); + MIB.addOperand(MI.getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); + transferDeadCC(&MI, MIB); return MIB; } } @@ -888,14 +1025,14 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( } MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( - MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS) const { return nullptr; } -bool -SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - switch (MI->getOpcode()) { +bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + switch (MI.getOpcode()) { case SystemZ::L128: splitMove(MI, SystemZ::LG); return true; @@ -1033,13 +1170,13 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return true; case SystemZ::RISBMux: { - bool DestIsHigh = isHighReg(MI->getOperand(0).getReg()); - bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg()); + bool DestIsHigh = isHighReg(MI.getOperand(0).getReg()); + bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg()); if (SrcIsHigh == DestIsHigh) - MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); + MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); else { - MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH)); - MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32); + MI.setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH)); + MI.getOperand(5).setImm(MI.getOperand(5).getImm() ^ 32); } return true; } @@ -1048,62 +1185,65 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { splitAdjDynAlloc(MI); return true; + case TargetOpcode::LOAD_STACK_GUARD: + expandLoadStackGuard(&MI); + return true; + default: return false; } } -uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const { - if (MI->getOpcode() == TargetOpcode::INLINEASM) { - const MachineFunction *MF = MI->getParent()->getParent(); - const char *AsmStr = MI->getOperand(0).getSymbolName(); +uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + if (MI.getOpcode() == TargetOpcode::INLINEASM) { + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } - return MI->getDesc().getSize(); + return MI.getDesc().getSize(); } SystemZII::Branch -SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const { - switch (MI->getOpcode()) { +SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const { + switch (MI.getOpcode()) { case SystemZ::BR: case SystemZ::J: case SystemZ::JG: return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, - SystemZ::CCMASK_ANY, &MI->getOperand(0)); + SystemZ::CCMASK_ANY, &MI.getOperand(0)); case SystemZ::BRC: case SystemZ::BRCL: - return SystemZII::Branch(SystemZII::BranchNormal, - MI->getOperand(0).getImm(), - MI->getOperand(1).getImm(), &MI->getOperand(2)); + return SystemZII::Branch(SystemZII::BranchNormal, MI.getOperand(0).getImm(), + MI.getOperand(1).getImm(), &MI.getOperand(2)); case SystemZ::BRCT: return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP, - SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + SystemZ::CCMASK_CMP_NE, &MI.getOperand(2)); case SystemZ::BRCTG: return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP, - SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + SystemZ::CCMASK_CMP_NE, &MI.getOperand(2)); case SystemZ::CIJ: case SystemZ::CRJ: return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP, - MI->getOperand(2).getImm(), &MI->getOperand(3)); + MI.getOperand(2).getImm(), &MI.getOperand(3)); case SystemZ::CLIJ: case SystemZ::CLRJ: return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP, - MI->getOperand(2).getImm(), &MI->getOperand(3)); + MI.getOperand(2).getImm(), &MI.getOperand(3)); case SystemZ::CGIJ: case SystemZ::CGRJ: return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP, - MI->getOperand(2).getImm(), &MI->getOperand(3)); + MI.getOperand(2).getImm(), &MI.getOperand(3)); case SystemZ::CLGIJ: case SystemZ::CLGRJ: return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, - MI->getOperand(2).getImm(), &MI->getOperand(3)); + MI.getOperand(2).getImm(), &MI.getOperand(3)); default: llvm_unreachable("Unrecognized branch opcode"); @@ -1250,28 +1390,107 @@ bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, return false; } -unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode, - const MachineInstr *MI) const { +unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode, + SystemZII::FusedCompareType Type, + const MachineInstr *MI) const { switch (Opcode) { - case SystemZ::CR: - return SystemZ::CRJ; - case SystemZ::CGR: - return SystemZ::CGRJ; case SystemZ::CHI: - return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0; case SystemZ::CGHI: - return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0; - case SystemZ::CLR: - return SystemZ::CLRJ; - case SystemZ::CLGR: - return SystemZ::CLGRJ; + if (!(MI && isInt<8>(MI->getOperand(1).getImm()))) + return 0; + break; case SystemZ::CLFI: - return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0; case SystemZ::CLGFI: - return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0; - default: - return 0; + if (!(MI && isUInt<8>(MI->getOperand(1).getImm()))) + return 0; } + switch (Type) { + case SystemZII::CompareAndBranch: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRJ; + case SystemZ::CGR: + return SystemZ::CGRJ; + case SystemZ::CHI: + return SystemZ::CIJ; + case SystemZ::CGHI: + return SystemZ::CGIJ; + case SystemZ::CLR: + return SystemZ::CLRJ; + case SystemZ::CLGR: + return SystemZ::CLGRJ; + case SystemZ::CLFI: + return SystemZ::CLIJ; + case SystemZ::CLGFI: + return SystemZ::CLGIJ; + default: + return 0; + } + case SystemZII::CompareAndReturn: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRBReturn; + case SystemZ::CGR: + return SystemZ::CGRBReturn; + case SystemZ::CHI: + return SystemZ::CIBReturn; + case SystemZ::CGHI: + return SystemZ::CGIBReturn; + case SystemZ::CLR: + return SystemZ::CLRBReturn; + case SystemZ::CLGR: + return SystemZ::CLGRBReturn; + case SystemZ::CLFI: + return SystemZ::CLIBReturn; + case SystemZ::CLGFI: + return SystemZ::CLGIBReturn; + default: + return 0; + } + case SystemZII::CompareAndSibcall: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRBCall; + case SystemZ::CGR: + return SystemZ::CGRBCall; + case SystemZ::CHI: + return SystemZ::CIBCall; + case SystemZ::CGHI: + return SystemZ::CGIBCall; + case SystemZ::CLR: + return SystemZ::CLRBCall; + case SystemZ::CLGR: + return SystemZ::CLGRBCall; + case SystemZ::CLFI: + return SystemZ::CLIBCall; + case SystemZ::CLGFI: + return SystemZ::CLGIBCall; + default: + return 0; + } + case SystemZII::CompareAndTrap: + switch (Opcode) { + case SystemZ::CR: + return SystemZ::CRT; + case SystemZ::CGR: + return SystemZ::CGRT; + case SystemZ::CHI: + return SystemZ::CIT; + case SystemZ::CGHI: + return SystemZ::CGIT; + case SystemZ::CLR: + return SystemZ::CLRT; + case SystemZ::CLGR: + return SystemZ::CLGRT; + case SystemZ::CLFI: + return SystemZ::CLFIT; + case SystemZ::CLGFI: + return SystemZ::CLGIT; + default: + return 0; + } + } + return 0; } void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index d9094ba93658..010010b89dc8 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -111,6 +111,22 @@ struct Branch { const MachineOperand *target) : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} }; +// Kinds of fused compares in compare-and-* instructions. Together with type +// of the converted compare, this identifies the compare-and-* +// instruction. +enum FusedCompareType { + // Relative branch - CRJ etc. + CompareAndBranch, + + // Indirect branch, used for return - CRBReturn etc. + CompareAndReturn, + + // Indirect branch, used for sibcall - CRBCall etc. + CompareAndSibcall, + + // Trap + CompareAndTrap +}; } // end namespace SystemZII class SystemZSubtarget; @@ -120,16 +136,17 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; - void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, - unsigned HighOpcode, bool ConvertHigh) const; - void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, + void expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode, + bool ConvertHigh) const; + void expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned LowOpcodeK, unsigned HighOpcode) const; - void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode, + void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; + void expandLoadStackGuard(MachineInstr *MI) const; void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; virtual void anchor(); @@ -137,26 +154,26 @@ public: explicit SystemZInstrInfo(SystemZSubtarget &STI); // Override TargetInstrInfo. - unsigned isLoadFromStackSlot(const MachineInstr *MI, + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - unsigned isStoreToStackSlot(const MachineInstr *MI, + unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex, + bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex, int &SrcFrameIndex) const override; - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override; unsigned RemoveBranch(MachineBasicBlock &MBB) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - DebugLoc DL) const override; - bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + const DebugLoc &DL) const override; + bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const override; - bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; - bool isPredicable(MachineInstr *MI) const override; + bool isPredicable(MachineInstr &MI) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override; @@ -165,10 +182,12 @@ public: MachineBasicBlock &FMBB, unsigned NumCyclesF, unsigned ExtraPredCyclesF, BranchProbability Probability) const override; - bool PredicateInstruction(MachineInstr *MI, + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const override; + bool PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -181,17 +200,18 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, + MachineInstr &MI, LiveVariables *LV) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, - int FrameIndex) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, - MachineInstr *LoadMI) const override; - bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; + MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, + LiveIntervals *LIS = nullptr) const override; + MachineInstr *foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS = nullptr) const override; + bool expandPostRAPseudo(MachineInstr &MBBI) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; @@ -199,14 +219,14 @@ public: const SystemZRegisterInfo &getRegisterInfo() const { return RI; } // Return the size in bytes of MI. - uint64_t getInstSizeInBytes(const MachineInstr *MI) const; + uint64_t getInstSizeInBytes(const MachineInstr &MI) const; // Return true if MI is a conditional or unconditional branch. // When returning true, set Cond to the mask of condition-code // values on which the instruction will branch, and set Target // to the operand that contains the branch target. This target // can be a register or a basic block. - SystemZII::Branch getBranchInfo(const MachineInstr *MI) const; + SystemZII::Branch getBranchInfo(const MachineInstr &MI) const; // Get the load and store opcodes for a given register class. void getLoadStoreOpcodes(const TargetRegisterClass *RC, @@ -229,11 +249,12 @@ public: bool isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const; - // If Opcode is a COMPARE opcode for which an associated COMPARE AND - // BRANCH exists, return the opcode for the latter, otherwise return 0. + // If Opcode is a COMPARE opcode for which an associated fused COMPARE AND * + // operation exists, return the opcode for the latter, otherwise return 0. // MI, if nonnull, is the compare instruction. - unsigned getCompareAndBranch(unsigned Opcode, - const MachineInstr *MI = nullptr) const; + unsigned getFusedCompare(unsigned Opcode, + SystemZII::FusedCompareType Type, + const MachineInstr *MI = nullptr) const; // Emit code before MBBI in MI to move immediate value Value into // physical register Reg. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index d5dabc2cd6ab..c510ca774be3 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -36,6 +36,22 @@ let hasSideEffects = 0 in { let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def Return : Alias<2, (outs), (ins), [(z_retflag)]>; +// A conditional return instruction (bcr <cond>, %r14). +let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in + def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>; + +// Fused compare and conditional returns. +let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { + def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>; + def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>; + def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>; + def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>; + def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>; + def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>; + def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>; + def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>; +} + // Unconditional branches. R1 is the condition-code mask (all 1s). let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { let isIndirectBranch = 1 in @@ -51,6 +67,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>; } +// FIXME: This trap instruction should be marked as isTerminator, but there is +// currently a general bug that allows non-terminators to be placed between +// terminators. Temporarily leave this unmarked until the bug is fixed. +let isBarrier = 1, hasCtrlDep = 1 in { + def Trap : Alias<4, (outs), (ins), [(trap)]>; +} + +let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in { + def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; +} + // Conditional branches. It's easier for LLVM to handle these branches // in their raw BRC/BRCL form, with the 4-bit condition-code mask being // the first operand. It seems friendlier to use mnemonic forms like @@ -62,15 +89,25 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>; def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1, brtarget32:$I2), "jg$R1\t$I2", []>; + let isIndirectBranch = 1 in + def BCR : InstRR<0x07, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2), + "b${R1}r\t$R2", []>; } def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2), "brc\t$R1, $I2", []>; def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2), "brcl\t$R1, $I2", []>; - def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2), - "bcr\t$R1, $R2", []>; + let isIndirectBranch = 1 in { + def AsmBC : InstRX<0x47, (outs), (ins imm32zx4:$R1, bdxaddr12only:$XBD2), + "bc\t$R1, $XBD2", []>; + def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2), + "bcr\t$R1, $R2", []>; + } } +def AsmNop : InstAlias<"nop\t$XBD", (AsmBC 0, bdxaddr12only:$XBD), 0>; +def AsmNopR : InstAlias<"nopr\t$R", (AsmBCR 0, GR64:$R), 0>; + // Fused compare-and-branch instructions. As for normal branches, // we handle these instructions internally in their raw CRJ-like form, // but use assembly macros like CRJE when writing them out. @@ -83,38 +120,83 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> { let isBranch = 1, isTerminator = 1, Defs = [CC] in { def RJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, brtarget16:$RI4), - "crj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + "crj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>; def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, brtarget16:$RI4), - "cgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + "cgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>; def IJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3, brtarget16:$RI4), - "cij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + "cij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>; def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3, brtarget16:$RI4), - "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + "cgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>; def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, brtarget16:$RI4), - "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + "clrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>; def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, brtarget16:$RI4), - "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + "clgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>; def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3, brtarget16:$RI4), - "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + "clij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>; def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3, brtarget16:$RI4), - "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + "clgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>; + let isIndirectBranch = 1 in { + def RB : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + bdaddr12only:$BD4), + "crb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>; + def GRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + bdaddr12only:$BD4), + "cgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>; + def IB : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3, + bdaddr12only:$BD4), + "cib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>; + def GIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3, + bdaddr12only:$BD4), + "cgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>; + def LRB : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + bdaddr12only:$BD4), + "clrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>; + def LGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + bdaddr12only:$BD4), + "clgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>; + def LIB : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3, + bdaddr12only:$BD4), + "clib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>; + def LGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3, + bdaddr12only:$BD4), + "clgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>; + } + } + + let isTerminator = 1, hasCtrlDep = 1 in { + def RT : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3), + "crt"##pos1##"\t$R1, $R2"##pos2, []>; + def GRT : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3), + "cgrt"##pos1##"\t$R1, $R2"##pos2, []>; + def LRT : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3), + "clrt"##pos1##"\t$R1, $R2"##pos2, []>; + def LGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3), + "clgrt"##pos1##"\t$R1, $R2"##pos2, []>; + def IT : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2, ccmask:$M3), + "cit"##pos1##"\t$R1, $I2"##pos2, []>; + def GIT : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2, ccmask:$M3), + "cgit"##pos1##"\t$R1, $I2"##pos2, []>; + def LFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2, ccmask:$M3), + "clfit"##pos1##"\t$R1, $I2"##pos2, []>; + def LGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2, ccmask:$M3), + "clgit"##pos1##"\t$R1, $I2"##pos2, []>; } } let isCodeGenOnly = 1 in defm C : CompareBranches<cond4, "$M3", "">; -defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">; +defm AsmC : CompareBranches<imm32zx4, "", ", $M3">; // Define AsmParser mnemonics for each general condition-code mask // (integer or floating-point) -multiclass CondExtendedMnemonic<bits<4> ccmask, string name> { - let R1 = ccmask in { +multiclass CondExtendedMnemonicA<bits<4> ccmask, string name> { + let isBranch = 1, isTerminator = 1, R1 = ccmask in { def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j"##name##"\t$I2", []>; def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), @@ -123,25 +205,36 @@ multiclass CondExtendedMnemonic<bits<4> ccmask, string name> { } def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>; def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>; + def LOCHI : FixedCondUnaryRIE<"lochi"##name, 0xEC42, GR64, imm32sx16, + ccmask>; + def LOCGHI: FixedCondUnaryRIE<"locghi"##name, 0xEC46, GR64, imm64sx16, + ccmask>; def LOC : FixedCondUnaryRSY<"loc"##name, 0xEBF2, GR32, ccmask, 4>; def LOCG : FixedCondUnaryRSY<"locg"##name, 0xEBE2, GR64, ccmask, 8>; def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>; def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>; } -defm AsmO : CondExtendedMnemonic<1, "o">; -defm AsmH : CondExtendedMnemonic<2, "h">; -defm AsmNLE : CondExtendedMnemonic<3, "nle">; -defm AsmL : CondExtendedMnemonic<4, "l">; -defm AsmNHE : CondExtendedMnemonic<5, "nhe">; -defm AsmLH : CondExtendedMnemonic<6, "lh">; -defm AsmNE : CondExtendedMnemonic<7, "ne">; -defm AsmE : CondExtendedMnemonic<8, "e">; -defm AsmNLH : CondExtendedMnemonic<9, "nlh">; -defm AsmHE : CondExtendedMnemonic<10, "he">; -defm AsmNL : CondExtendedMnemonic<11, "nl">; -defm AsmLE : CondExtendedMnemonic<12, "le">; -defm AsmNH : CondExtendedMnemonic<13, "nh">; -defm AsmNO : CondExtendedMnemonic<14, "no">; + +multiclass CondExtendedMnemonic<bits<4> ccmask, string name1, string name2> + : CondExtendedMnemonicA<ccmask, name1> { + let isAsmParserOnly = 1 in + defm Alt : CondExtendedMnemonicA<ccmask, name2>; +} + +defm AsmO : CondExtendedMnemonicA<1, "o">; +defm AsmH : CondExtendedMnemonic<2, "h", "p">; +defm AsmNLE : CondExtendedMnemonicA<3, "nle">; +defm AsmL : CondExtendedMnemonic<4, "l", "m">; +defm AsmNHE : CondExtendedMnemonicA<5, "nhe">; +defm AsmLH : CondExtendedMnemonicA<6, "lh">; +defm AsmNE : CondExtendedMnemonic<7, "ne", "nz">; +defm AsmE : CondExtendedMnemonic<8, "e", "z">; +defm AsmNLH : CondExtendedMnemonicA<9, "nlh">; +defm AsmHE : CondExtendedMnemonicA<10, "he">; +defm AsmNL : CondExtendedMnemonic<11, "nl", "nm">; +defm AsmLE : CondExtendedMnemonicA<12, "le">; +defm AsmNH : CondExtendedMnemonic<13, "nh", "np">; +defm AsmNO : CondExtendedMnemonicA<14, "no">; // Define AsmParser mnemonics for each integer condition-code mask. // This is like the list above, except that condition 3 is not possible @@ -151,31 +244,76 @@ defm AsmNO : CondExtendedMnemonic<14, "no">; // We don't make one of the two names an alias of the other because // we need the custom parsing routines to select the correct register class. multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> { - let M3 = ccmask in { - def CR : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, - brtarget16:$RI4), - "crj"##name##"\t$R1, $R2, $RI4", []>; - def CGR : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, - brtarget16:$RI4), - "cgrj"##name##"\t$R1, $R2, $RI4", []>; - def CI : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, - brtarget16:$RI4), - "cij"##name##"\t$R1, $I2, $RI4", []>; - def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, - brtarget16:$RI4), - "cgij"##name##"\t$R1, $I2, $RI4", []>; - def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, - brtarget16:$RI4), - "clrj"##name##"\t$R1, $R2, $RI4", []>; - def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, + let isBranch = 1, isTerminator = 1, M3 = ccmask in { + def CRJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, + brtarget16:$RI4), + "crj"##name##"\t$R1, $R2, $RI4", []>; + def CGRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "cgrj"##name##"\t$R1, $R2, $RI4", []>; + def CIJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, brtarget16:$RI4), - "clgrj"##name##"\t$R1, $R2, $RI4", []>; - def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, + "cij"##name##"\t$R1, $I2, $RI4", []>; + def CGIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, brtarget16:$RI4), - "clij"##name##"\t$R1, $I2, $RI4", []>; - def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, + "cgij"##name##"\t$R1, $I2, $RI4", []>; + def CLRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, brtarget16:$RI4), - "clgij"##name##"\t$R1, $I2, $RI4", []>; + "clrj"##name##"\t$R1, $R2, $RI4", []>; + def CLGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "clgrj"##name##"\t$R1, $R2, $RI4", []>; + def CLIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, + brtarget16:$RI4), + "clij"##name##"\t$R1, $I2, $RI4", []>; + def CLGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, + brtarget16:$RI4), + "clgij"##name##"\t$R1, $I2, $RI4", []>; + let isIndirectBranch = 1 in { + def CRB : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2, + bdaddr12only:$BD4), + "crb"##name##"\t$R1, $R2, $BD4", []>; + def CGRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2, + bdaddr12only:$BD4), + "cgrb"##name##"\t$R1, $R2, $BD4", []>; + def CIB : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2, + bdaddr12only:$BD4), + "cib"##name##"\t$R1, $I2, $BD4", []>; + def CGIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2, + bdaddr12only:$BD4), + "cgib"##name##"\t$R1, $I2, $BD4", []>; + def CLRB : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2, + bdaddr12only:$BD4), + "clrb"##name##"\t$R1, $R2, $BD4", []>; + def CLGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2, + bdaddr12only:$BD4), + "clgrb"##name##"\t$R1, $R2, $BD4", []>; + def CLIB : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2, + bdaddr12only:$BD4), + "clib"##name##"\t$R1, $I2, $BD4", []>; + def CLGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2, + bdaddr12only:$BD4), + "clgib"##name##"\t$R1, $I2, $BD4", []>; + } + } + + let hasCtrlDep = 1, isTerminator = 1, M3 = ccmask in { + def CRT : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2), + "crt"##name##"\t$R1, $R2", []>; + def CGRT : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2), + "cgrt"##name##"\t$R1, $R2", []>; + def CLRT : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2), + "clrt"##name##"\t$R1, $R2", []>; + def CLGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2), + "clgrt"##name##"\t$R1, $R2", []>; + def CIT : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2), + "cit"##name##"\t$R1, $I2", []>; + def CGIT : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2), + "cgit"##name##"\t$R1, $I2", []>; + def CLFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2), + "clfit"##name##"\t$R1, $I2", []>; + def CLGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2), + "clgit"##name##"\t$R1, $I2", []>; } } multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2> @@ -249,6 +387,26 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; } +let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in { + def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1, + pcrel32:$I2), []>; + + let Uses = [R1D] in + def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>; +} + +// Fused compare and conditional sibling calls. +let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in { + def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>; + def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>; + def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>; + def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>; + def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>; + def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>; + def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>; + def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>; +} + // TLS calls. These will be lowered into a call to __tls_get_offset, // with an extra relocation specifying the TLS symbol. let isCall = 1, Defs = [R14D, CC] in { @@ -261,12 +419,14 @@ let isCall = 1, Defs = [R14D, CC] in { // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. // Allow an optional TLS marker symbol to generate TLS call relocations. -def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2), - "bras\t$R1, $I2", []>; -def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2), - "brasl\t$R1, $I2", []>; -def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), - "basr\t$R1, $R2", []>; +let isCall = 1, Defs = [CC] in { + def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2), + "bras\t$R1, $I2", []>; + def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2), + "brasl\t$R1, $I2", []>; + def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; +} //===----------------------------------------------------------------------===// // Move instructions @@ -294,6 +454,14 @@ let Uses = [CC] in { def AsmLOCR : AsmCondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; } +let isCodeGenOnly = 1, Uses = [CC] in { + def LOCHI : CondUnaryRIE<"lochi", 0xEC42, GR32, imm32sx16>; + def LOCGHI : CondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>; +} +let Uses = [CC] in { + def AsmLOCHI : AsmCondUnaryRIE<"lochi", 0xEC42, GR32, imm32sx16>; + def AsmLOCGHI : AsmCondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>; +} // Immediate moves. let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, @@ -546,10 +714,14 @@ def : StoreGR64PC<STRL, aligned_truncstorei32>; //===----------------------------------------------------------------------===// // Multi-register loads. +defm LM : LoadMultipleRSPair<"lm", 0x98, 0xEB98, GR32>; def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>; +def LMH : LoadMultipleRSY<"lmh", 0xEB96, GRH32>; // Multi-register stores. +defm STM : StoreMultipleRSPair<"stm", 0x90, 0xEB90, GR32>; def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; +def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>; //===----------------------------------------------------------------------===// // Byte swaps @@ -563,13 +735,14 @@ let hasSideEffects = 0 in { // Byte-swapping loads. Unlike normal loads, these instructions are // allowed to access storage more than once. -def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>; -def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>; +def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>; +def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>; // Likewise byte-swapping stores. -def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>; -def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>, - GR64, 8>; +def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>; +def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>; +def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>; //===----------------------------------------------------------------------===// // Load address instructions @@ -657,6 +830,11 @@ defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>; defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>; defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; +let Defs = [CC] in { + defm ICM : TernaryRSPair<"icm", 0xBF, 0xEB81, GR32, 0>; + def ICMH : TernaryRSY<"icmh", 0xEB80, GRH32, 0>; +} + // Insertions of a 16-bit immediate, leaving other bits unaffected. // We don't have or_as_insert equivalents of these operations because // OI is available instead. @@ -812,7 +990,7 @@ defm : ZXB<subc, GR64, SLGFR>; let Defs = [CC], Uses = [CC] in { // Subtraction of a register. def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>; - def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>; + def SLBGR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>; // Subtraction of memory. def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>; @@ -865,7 +1043,7 @@ let Defs = [CC] in { // ANDs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; } // AND to memory @@ -1030,6 +1208,7 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; // Shift left. let hasSideEffects = 0 in { defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>; def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; } @@ -1208,6 +1387,9 @@ let Defs = [CC] in { defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>; } +def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>; +def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>; + //===----------------------------------------------------------------------===// // Prefetch //===----------------------------------------------------------------------===// @@ -1224,6 +1406,10 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; let hasSideEffects = 1 in def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; +// A pseudo instruction that serves as a compiler barrier. +let hasSideEffects = 1 in +def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>; + let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>; def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>; @@ -1466,6 +1652,10 @@ let mayLoad = 1, Defs = [CC] in defm SRST : StringRRE<"srst", 0xb25e, z_search_string>; // Other instructions for inline assembly +let hasSideEffects = 1, Defs = [CC], isCall = 1 in + def SVC : InstI<0x0A, (outs), (ins imm32zx8:$I1), + "svc\t$I1", + []>; let hasSideEffects = 1, Defs = [CC], mayStore = 1 in def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2), "stck\t$BD2", @@ -1483,6 +1673,12 @@ let hasSideEffects = 1, Defs = [CC], mayStore = 1 in "stfle\t$BD2", []>; +let hasSideEffects = 1 in { + def EX : InstRX<0x44, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2), + "ex\t$R1, $XBD2", []>; + def EXRL : InstRIL<0xC60, (outs), (ins GR64:$R1, pcrel32:$I2), + "exrl\t$R1, $I2", []>; +} //===----------------------------------------------------------------------===// @@ -1515,6 +1711,42 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, (i32 63)), (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>; +// Avoid generating 2 XOR instructions. (xor (and x, y), y) is +// equivalent to (and (xor x, -1), y) +def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), + (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>; + +// Shift/rotate instructions only use the last 6 bits of the second operand +// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the +// last 16 bits. +// Complexity is added so that we match this before we match NILF on the AND +// operation alone. +let AddedComplexity = 4 in { + def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)), + (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)), + (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)), + (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)), + (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)), + (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)), + (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)), + (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + + def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)), + (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; +} + // Peepholes for turning scalar operations into block operations. defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence, XCSequence, 1>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp index 24165be29ae7..2cdf2f9bf990 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -64,6 +64,9 @@ void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { } bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { + if (skipFunction(*F.getFunction())) + return false; + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); MF = &F; @@ -92,9 +95,9 @@ bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, switch (I->getOpcode()) { case SystemZ::TLS_LDCALL: if (TLSBaseAddrReg) - I = ReplaceTLSCall(I, TLSBaseAddrReg); + I = ReplaceTLSCall(&*I, TLSBaseAddrReg); else - I = SetRegister(I, &TLSBaseAddrReg); + I = SetRegister(&*I, &TLSBaseAddrReg); Changed = true; break; default: diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 8dab44e7f8af..a24d47d2d16b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -58,7 +58,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -139,12 +138,16 @@ public: } bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } private: void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, bool AssumeRelaxed); - TerminatorInfo describeTerminator(MachineInstr *MI); + TerminatorInfo describeTerminator(MachineInstr &MI); uint64_t initMBBInfo(); bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); bool mustRelaxABranch(); @@ -207,11 +210,11 @@ void SystemZLongBranch::skipTerminator(BlockPosition &Position, } // Return a description of terminator instruction MI. -TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { +TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) { TerminatorInfo Terminator; Terminator.Size = TII->getInstSizeInBytes(MI); - if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) { - switch (MI->getOpcode()) { + if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) { + switch (MI.getOpcode()) { case SystemZ::J: // Relaxes to JG, which is 2 bytes longer. Terminator.ExtraRelaxSize = 2; @@ -248,7 +251,7 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { default: llvm_unreachable("Unrecognized branch instruction"); } - Terminator.Branch = MI; + Terminator.Branch = &MI; Terminator.TargetBlock = TII->getBranchInfo(MI).Target->getMBB()->getNumber(); } @@ -280,7 +283,7 @@ uint64_t SystemZLongBranch::initMBBInfo() { MachineBasicBlock::iterator MI = MBB->begin(); MachineBasicBlock::iterator End = MBB->end(); while (MI != End && !MI->isTerminator()) { - Block.Size += TII->getInstSizeInBytes(MI); + Block.Size += TII->getInstSizeInBytes(*MI); ++MI; } skipNonTerminators(Position, Block); @@ -289,7 +292,7 @@ uint64_t SystemZLongBranch::initMBBInfo() { while (MI != End) { if (!MI->isDebugValue()) { assert(MI->isTerminator() && "Terminator followed by non-terminator"); - Terminators.push_back(describeTerminator(MI)); + Terminators.push_back(describeTerminator(*MI)); skipTerminator(Position, Terminators.back(), false); ++Block.NumTerminators; } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index f4a517bd54df..4f64f4c65f1d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -22,14 +22,15 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsFirstFPR; unsigned VarArgsFrameIndex; unsigned RegSaveFrameIndex; + int FramePointerSaveIndex; bool ManipulatesSP; unsigned NumLocalDynamics; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), - VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), - NumLocalDynamics(0) {} + VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), + ManipulatesSP(false), NumLocalDynamics(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -59,6 +60,10 @@ public: unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; } void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; } + // Get and set the frame index of where the old frame pointer is stored. + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } + void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + // Get and set whether the function directly manipulates the stack pointer, // e.g. through STACKSAVE or STACKRESTORE. bool getManipulatesSP() const { return ManipulatesSP; } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td index 9af90d492cf8..17b076d88a34 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -451,11 +451,11 @@ def PCRelTLS32 : PCRelTLSAsmOperand<"32">; // and multiplied by 2. def brtarget16 : PCRelOperand<OtherVT, PCRel16> { let EncoderMethod = "getPC16DBLEncoding"; - let DecoderMethod = "decodePC16DBLOperand"; + let DecoderMethod = "decodePC16DBLBranchOperand"; } def brtarget32 : PCRelOperand<OtherVT, PCRel32> { let EncoderMethod = "getPC32DBLEncoding"; - let DecoderMethod = "decodePC32DBLOperand"; + let DecoderMethod = "decodePC32DBLBranchOperand"; } // Variants of brtarget16/32 with an optional additional TLS symbol. @@ -464,12 +464,12 @@ def tlssym : Operand<i64> { } def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> { let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym); let EncoderMethod = "getPC16DBLTLSEncoding"; - let DecoderMethod = "decodePC16DBLOperand"; + let DecoderMethod = "decodePC16DBLBranchOperand"; } def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> { let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym); let EncoderMethod = "getPC32DBLTLSEncoding"; - let DecoderMethod = "decodePC32DBLOperand"; + let DecoderMethod = "decodePC32DBLBranchOperand"; } // A PC-relative offset of a global value. The offset is sign-extended diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td index 3c95a1e11b45..8d031f1ea05d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -79,6 +79,14 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_ZLoadBSwap : SDTypeProfile<1, 2, + [SDTCisInt<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, OtherVT>]>; +def SDT_ZStoreBSwap : SDTypeProfile<0, 3, + [SDTCisInt<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, OtherVT>]>; def SDT_ZTBegin : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -137,6 +145,7 @@ def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<4, i32>]>; +def SDT_ZTest : SDTypeProfile<0, 2, [SDTCisVT<1, i64>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -188,6 +197,15 @@ def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, [SDNPHasChain, SDNPMayStore]>; +def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + +def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest, [SDNPOutGlue]>; // Defined because the index is an i32 rather than a pointer. def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", @@ -329,6 +347,17 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>; // Pattern fragments //===----------------------------------------------------------------------===// +def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>; +def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>; +def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>; + +def z_strvh : PatFrag<(ops node:$src, node:$addr), + (z_storebswap node:$src, node:$addr, i16)>; +def z_strv : PatFrag<(ops node:$src, node:$addr), + (z_storebswap node:$src, node:$addr, i32)>; +def z_strvg : PatFrag<(ops node:$src, node:$addr), + (z_storebswap node:$src, node:$addr, i64)>; + // Signed and unsigned comparisons. def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td index 32fbe5ae9ef9..9adc0189e650 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -29,6 +29,11 @@ def FeatureLoadStoreOnCond : SystemZFeature< "Assume that the load/store-on-condition facility is installed" >; +def FeatureLoadStoreOnCond2 : SystemZFeature< + "load-store-on-cond-2", "LoadStoreOnCond2", + "Assume that the load/store-on-condition facility 2 is installed" +>; + def FeatureHighWord : SystemZFeature< "high-word", "HighWord", "Assume that the high-word facility is installed" @@ -92,5 +97,6 @@ def : Processor<"z13", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, FeatureFPExtension, FeaturePopulationCount, FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureMiscellaneousExtensions, FeatureTransactionalExecution, FeatureProcessorAssist, - FeatureVector]>; + FeatureVector, FeatureLoadStoreOnCond2]>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 6fd24e3df625..b5e5fd4bfc4f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -24,12 +24,20 @@ SystemZRegisterInfo::SystemZRegisterInfo() const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + if (MF->getSubtarget().getTargetLowering()->supportSwiftError() && + MF->getFunction()->getAttributes().hasAttrSomewhere( + Attribute::SwiftError)) + return CSR_SystemZ_SwiftError_SaveList; return CSR_SystemZ_SaveList; } const uint32_t * SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { + if (MF.getSubtarget().getTargetLowering()->supportSwiftError() && + MF.getFunction()->getAttributes().hasAttrSomewhere( + Attribute::SwiftError)) + return CSR_SystemZ_SwiftError_RegMask; return CSR_SystemZ_RegMask; } @@ -84,8 +92,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // accepts the offset exists. unsigned Opcode = MI->getOpcode(); unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); - if (OpcodeForOffset) + if (OpcodeForOffset) { + if (OpcodeForOffset == SystemZ::LE && + MF.getSubtarget<SystemZSubtarget>().hasVector()) { + // If LE is ok for offset, use LDE instead on z13. + OpcodeForOffset = SystemZ::LDE32; + } MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + } else { // Create an anchor point that is in range. Start at 0xffff so that // can use LLILH to load the immediate. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index a0db5a9c188f..e41c06c98af2 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -33,6 +33,15 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { public: SystemZRegisterInfo(); + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0 + /// register, hence ADDR64. + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind=0) const override { + return &SystemZ::ADDR64BitRegClass; + } + // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 178aa3817311..657482504045 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -23,7 +23,7 @@ using namespace llvm; // address Dest. Sequence is the opcode to use for straight-line code // (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). // Return the chain for the completed operation. -static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, +static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence, unsigned Loop, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size) { EVT PtrVT = Src.getValueType(); @@ -46,12 +46,10 @@ static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, DAG.getConstant(Size, DL, PtrVT)); } -SDValue SystemZSelectionDAGInfo:: -EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Dst, SDValue Src, SDValue Size, unsigned Align, - bool IsVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { +SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { if (IsVolatile) return SDValue(); @@ -64,24 +62,21 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by // Chain, Dst, ByteVal and Size. These cases are expected to use // MVI, MVHHI, MVHI and MVGHI respectively. -static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, +static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, uint64_t ByteVal, uint64_t Size, - unsigned Align, - MachinePointerInfo DstPtrInfo) { + unsigned Align, MachinePointerInfo DstPtrInfo) { uint64_t StoreVal = ByteVal; for (unsigned I = 1; I < Size; ++I) StoreVal |= ByteVal << (I * 8); - return DAG.getStore(Chain, DL, - DAG.getConstant(StoreVal, DL, - MVT::getIntegerVT(Size * 8)), - Dst, DstPtrInfo, false, false, Align); + return DAG.getStore( + Chain, DL, DAG.getConstant(StoreVal, DL, MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, Align); } -SDValue SystemZSelectionDAGInfo:: -EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Dst, SDValue Byte, SDValue Size, - unsigned Align, bool IsVolatile, - MachinePointerInfo DstPtrInfo) const { +SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, + SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { EVT PtrVT = Dst.getValueType(); if (IsVolatile) @@ -116,15 +111,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } else { // Handle one and two bytes using STC. if (Bytes <= 2) { - SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, - false, false, Align); + SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); if (Bytes == 1) return Chain1; SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, DAG.getConstant(1, DL, PtrVT)); - SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, - DstPtrInfo.getWithOffset(1), - false, false, 1); + SDValue Chain2 = + DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1), + /* Alignment = */ 1); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); } } @@ -138,8 +132,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // Copy the byte to the first location and then use MVC to copy // it to the rest. - Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, - false, false, Align); + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, DAG.getConstant(1, DL, PtrVT)); return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, @@ -150,7 +143,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), // deciding whether to use a loop or straight-line code. -static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, +static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, uint64_t Size) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); EVT PtrVT = Src1.getValueType(); @@ -174,7 +167,8 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // less than zero if CC == 1 and greater than zero if CC >= 2. // The sequence starts with IPM, which puts CC into bits 29 and 28 // of an integer and clears bits 30 and 31. -static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { +static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue, + SelectionDAG &DAG) { SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); @@ -183,11 +177,10 @@ static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { return ROTL; } -std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: -EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Src1, SDValue Src2, SDValue Size, - MachinePointerInfo Op1PtrInfo, - MachinePointerInfo Op2PtrInfo) const { +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, + SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { uint64_t Bytes = CSize->getZExtValue(); assert(Bytes > 0 && "Caller should have handled 0-size case"); @@ -198,10 +191,9 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, return std::make_pair(SDValue(), SDValue()); } -std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: -EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Src, SDValue Char, SDValue Length, - MachinePointerInfo SrcPtrInfo) const { +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, + SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const { // Use SRST to find the character. End is its address on success. EVT PtrVT = Src.getValueType(); SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); @@ -226,22 +218,20 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, return std::make_pair(End, Chain); } -std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: -EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Dest, SDValue Src, - MachinePointerInfo DestPtrInfo, - MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest, + SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo, + bool isStpcpy) const { SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, DAG.getConstant(0, DL, MVT::i32)); return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); } -std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: -EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Src1, SDValue Src2, - MachinePointerInfo Op1PtrInfo, - MachinePointerInfo Op2PtrInfo) const { +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, + SDValue Src2, MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, DAG.getConstant(0, DL, MVT::i32)); @@ -255,7 +245,8 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // and the second being the out chain. // // This can be used for strlen by setting Limit to 0. -static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, +static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, + const SDLoc &DL, SDValue Chain, SDValue Src, SDValue Limit) { EVT PtrVT = Src.getValueType(); @@ -265,19 +256,18 @@ static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, Chain = End.getValue(1); SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); return std::make_pair(Len, Chain); -} +} -std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: -EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Src, MachinePointerInfo SrcPtrInfo) const { +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, + MachinePointerInfo SrcPtrInfo) const { EVT PtrVT = Src.getValueType(); return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); } -std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: -EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Src, SDValue MaxLength, - MachinePointerInfo SrcPtrInfo) const { +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, + SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const { EVT PtrVT = Src.getValueType(); MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 246fa3e5e656..93cd970c30c6 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -7,66 +7,64 @@ // //===----------------------------------------------------------------------===// // -// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// This file defines the SystemZ subclass for SelectionDAGTargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H -#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" namespace llvm { class SystemZTargetMachine; -class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo { public: explicit SystemZSelectionDAGInfo() = default; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool IsVolatile, bool AlwaysInline, + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool IsVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, MachinePointerInfo DstPtrInfo) const override; std::pair<SDValue, SDValue> - EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const override; std::pair<SDValue, SDValue> - EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const override; - std::pair<SDValue, SDValue> - EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Dest, SDValue Src, - MachinePointerInfo DestPtrInfo, - MachinePointerInfo SrcPtrInfo, - bool isStpcpy) const override; + std::pair<SDValue, SDValue> EmitTargetCodeForStrcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest, + SDValue Src, MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, bool isStpcpy) const override; std::pair<SDValue, SDValue> - EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const override; std::pair<SDValue, SDValue> - EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + EmitTargetCodeForStrlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, MachinePointerInfo SrcPtrInfo) const override; std::pair<SDValue, SDValue> - EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + EmitTargetCodeForStrnlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const override; }; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 846edd51341a..7f26a3519e50 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -35,6 +35,10 @@ public: bool processBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } private: bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH); @@ -68,18 +72,20 @@ static void tieOpsIfNeeded(MachineInstr &MI) { // MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH // are the halfword immediate loads for the same word. Try to use one of them -// instead of IIxF. -bool SystemZShortenInst::shortenIIF(MachineInstr &MI, - unsigned LLIxL, unsigned LLIxH) { +// instead of IIxF. +bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, + unsigned LLIxH) { unsigned Reg = MI.getOperand(0).getReg(); // The new opcode will clear the other half of the GR64 reg, so // cancel if that is live. - unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ? - SystemZ::subreg_h32 : SystemZ::subreg_l32); - unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ? - SystemZ::subreg_h32 : SystemZ::subreg_l32); - unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, - &SystemZ::GR64BitRegClass); + unsigned thisSubRegIdx = + (SystemZ::GRH32BitRegClass.contains(Reg) ? SystemZ::subreg_h32 + : SystemZ::subreg_l32); + unsigned otherSubRegIdx = + (thisSubRegIdx == SystemZ::subreg_l32 ? SystemZ::subreg_h32 + : SystemZ::subreg_l32); + unsigned GR64BitReg = + TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass); unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); if (LiveRegs.contains(OtherReg)) return false; @@ -135,11 +141,10 @@ bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { // Calls shortenOn001 if CCLive is false. CC def operand is added in // case of success. -bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, - unsigned Opcode) { +bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) { if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) { MachineInstrBuilder(*MI.getParent()->getParent(), &MI) - .addReg(SystemZ::CC, RegState::ImplicitDefine); + .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); return true; } return false; @@ -177,7 +182,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { // Set up the set of live registers at the end of MBB (live out) LiveRegs.clear(); - LiveRegs.addLiveOuts(&MBB); + LiveRegs.addLiveOuts(MBB); // Iterate backwards through the block looking for instructions to change. for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { @@ -264,6 +269,9 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { } bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { + if (skipFunction(*F.getFunction())) + return false; + const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 0b49fcdd8f78..67d5e0179fe2 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -40,21 +40,11 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasPopulationCount(false), HasFastSerialization(false), HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), HasTransactionalExecution(false), HasProcessorAssist(false), - HasVector(false), TargetTriple(TT), + HasVector(false), HasLoadStoreOnCond2(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} -// Return true if GV binds locally under reloc model RM. -static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) { - // For non-PIC, all symbols bind locally. - if (RM == Reloc::Static) - return true; - - return GV->hasLocalLinkage() || !GV->hasDefaultVisibility(); -} - bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV, - Reloc::Model RM, CodeModel::Model CM) const { // PC32DBL accesses require the low bit to be clear. Note that a zero // value selects the default alignment and is therefore OK. @@ -63,7 +53,7 @@ bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV, // For the small model, all locally-binding symbols are in range. if (CM == CodeModel::Small) - return bindsLocally(GV, RM); + return TLInfo.getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); // For Medium and above, assume that the symbol is not within the 4GB range. // Taking the address of locally-defined text would be OK, but that diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h index f7eaf01cb77e..6007f6fc9c4c 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -45,6 +45,7 @@ protected: bool HasTransactionalExecution; bool HasProcessorAssist; bool HasVector; + bool HasLoadStoreOnCond2; private: Triple TargetTriple; @@ -69,7 +70,7 @@ public: const SystemZTargetLowering *getTargetLowering() const override { return &TLInfo; } - const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; } @@ -85,6 +86,9 @@ public: // Return true if the target has the load/store-on-condition facility. bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; } + // Return true if the target has the load/store-on-condition facility 2. + bool hasLoadStoreOnCond2() const { return HasLoadStoreOnCond2; } + // Return true if the target has the high-word facility. bool hasHighWord() const { return HasHighWord; } @@ -116,8 +120,7 @@ public: // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. - bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, - CodeModel::Model CM) const; + bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } }; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp new file mode 100644 index 000000000000..96a9ef82c125 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -0,0 +1,382 @@ +//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for instructions that can be replaced by a Test Data Class +// instruction, and replaces them when profitable. +// +// Roughly, the following rules are recognized: +// +// 1: fcmp pred X, 0 -> tdc X, mask +// 2: fcmp pred X, +-inf -> tdc X, mask +// 3: fcmp pred X, +-minnorm -> tdc X, mask +// 4: tdc (fabs X), mask -> tdc X, newmask +// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] +// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask +// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask +// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) +// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) +// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) +// +// The pass works in 4 steps: +// +// 1. All fcmp and icmp instructions in a function are checked for a match +// with rules 1-3 and 5-7. Their TDC equivalents are stored in +// the ConvertedInsts mapping. If the operand of a fcmp instruction is +// a fabs, it's also folded according to rule 4. +// 2. All and/or/xor i1 instructions whose both operands have been already +// mapped are mapped according to rules 8-10. LogicOpsWorklist is used +// as a queue of instructions to check. +// 3. All mapped instructions that are considered worthy of conversion (ie. +// replacing them will actually simplify the final code) are replaced +// with a call to the s390.tdc intrinsic. +// 4. All intermediate results of replaced instructions are removed if unused. +// +// Instructions that match rules 1-3 are considered unworthy of conversion +// on their own (since a comparison instruction is superior), but are mapped +// in the hopes of folding the result using rules 4 and 8-10 (likely removing +// the original comparison in the process). +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include <deque> +#include <set> + +using namespace llvm; + +namespace llvm { + void initializeSystemZTDCPassPass(PassRegistry&); +} + +namespace { + +class SystemZTDCPass : public FunctionPass { +public: + static char ID; + SystemZTDCPass() : FunctionPass(ID) { + initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +private: + // Maps seen instructions that can be mapped to a TDC, values are + // (TDC operand, TDC mask, worthy flag) triples. + MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; + // The queue of and/or/xor i1 instructions to be potentially folded. + std::vector<BinaryOperator *> LogicOpsWorklist; + // Instructions matched while folding, to be removed at the end if unused. + std::set<Instruction *> PossibleJunk; + + // Tries to convert a fcmp instruction. + void convertFCmp(CmpInst &I); + + // Tries to convert an icmp instruction. + void convertICmp(CmpInst &I); + + // Tries to convert an i1 and/or/xor instruction, whose both operands + // have been already converted. + void convertLogicOp(BinaryOperator &I); + + // Marks an instruction as converted - adds it to ConvertedInsts and adds + // any and/or/xor i1 users to the queue. + void converted(Instruction *I, Value *V, int Mask, bool Worthy) { + ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); + auto &M = *I->getFunction()->getParent(); + auto &Ctx = M.getContext(); + for (auto *U : I->users()) { + auto *LI = dyn_cast<BinaryOperator>(U); + if (LI && LI->getType() == Type::getInt1Ty(Ctx) && + (LI->getOpcode() == Instruction::And || + LI->getOpcode() == Instruction::Or || + LI->getOpcode() == Instruction::Xor)) { + LogicOpsWorklist.push_back(LI); + } + } + } +}; + +} // end anonymous namespace + +char SystemZTDCPass::ID = 0; +INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", + "SystemZ Test Data Class optimization", false, false) + +FunctionPass *llvm::createSystemZTDCPass() { + return new SystemZTDCPass(); +} + +void SystemZTDCPass::convertFCmp(CmpInst &I) { + Value *Op0 = I.getOperand(0); + auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); + auto Pred = I.getPredicate(); + // Only comparisons with consts are interesting. + if (!Const) + return; + // Compute the smallest normal number (and its negation). + auto &Sem = Op0->getType()->getFltSemantics(); + APFloat Smallest = APFloat::getSmallestNormalized(Sem); + APFloat NegSmallest = Smallest; + NegSmallest.changeSign(); + // Check if Const is one of our recognized consts. + int WhichConst; + if (Const->isZero()) { + // All comparisons with 0 can be converted. + WhichConst = 0; + } else if (Const->isInfinity()) { + // Likewise for infinities. + WhichConst = Const->isNegative() ? 2 : 1; + } else if (Const->isExactlyValue(Smallest)) { + // For Smallest, we cannot do EQ separately from GT. + if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && + (Pred & CmpInst::FCMP_OGE) != 0) + return; + WhichConst = 3; + } else if (Const->isExactlyValue(NegSmallest)) { + // Likewise for NegSmallest, we cannot do EQ separately from LT. + if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && + (Pred & CmpInst::FCMP_OLE) != 0) + return; + WhichConst = 4; + } else { + // Not one of our special constants. + return; + } + // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. + static const int Masks[][4] = { + { // 0 + SystemZ::TDCMASK_ZERO, // eq + SystemZ::TDCMASK_POSITIVE, // gt + SystemZ::TDCMASK_NEGATIVE, // lt + SystemZ::TDCMASK_NAN, // un + }, + { // inf + SystemZ::TDCMASK_INFINITY_PLUS, // eq + 0, // gt + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_NEGATIVE | + SystemZ::TDCMASK_NORMAL_PLUS | + SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt + SystemZ::TDCMASK_NAN, // un + }, + { // -inf + SystemZ::TDCMASK_INFINITY_MINUS, // eq + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_POSITIVE | + SystemZ::TDCMASK_NORMAL_MINUS | + SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt + 0, // lt + SystemZ::TDCMASK_NAN, // un + }, + { // minnorm + 0, // eq (unsupported) + (SystemZ::TDCMASK_NORMAL_PLUS | + SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_NEGATIVE | + SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt + SystemZ::TDCMASK_NAN, // un + }, + { // -minnorm + 0, // eq (unsupported) + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_POSITIVE | + SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt + (SystemZ::TDCMASK_NORMAL_MINUS | + SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) + SystemZ::TDCMASK_NAN, // un + } + }; + // Construct the mask as a combination of the partial masks. + int Mask = 0; + if (Pred & CmpInst::FCMP_OEQ) + Mask |= Masks[WhichConst][0]; + if (Pred & CmpInst::FCMP_OGT) + Mask |= Masks[WhichConst][1]; + if (Pred & CmpInst::FCMP_OLT) + Mask |= Masks[WhichConst][2]; + if (Pred & CmpInst::FCMP_UNO) + Mask |= Masks[WhichConst][3]; + // A lone fcmp is unworthy of tdc conversion on its own, but may become + // worthy if combined with fabs. + bool Worthy = false; + if (CallInst *CI = dyn_cast<CallInst>(Op0)) { + Function *F = CI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::fabs) { + // Fold with fabs - adjust the mask appropriately. + Mask &= SystemZ::TDCMASK_PLUS; + Mask |= Mask >> 1; + Op0 = CI->getArgOperand(0); + // A combination of fcmp with fabs is a win, unless the constant + // involved is 0 (which is handled by later passes). + Worthy = WhichConst != 0; + PossibleJunk.insert(CI); + } + } + converted(&I, Op0, Mask, Worthy); +} + +void SystemZTDCPass::convertICmp(CmpInst &I) { + Value *Op0 = I.getOperand(0); + auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); + auto Pred = I.getPredicate(); + // All our icmp rules involve comparisons with consts. + if (!Const) + return; + if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { + // Check for icmp+bitcast used for signbit. + if (!Cast->getSrcTy()->isFloatTy() && + !Cast->getSrcTy()->isDoubleTy() && + !Cast->getSrcTy()->isFP128Ty()) + return; + Value *V = Cast->getOperand(0); + int Mask; + if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { + // icmp slt (bitcast X), 0 - set if sign bit true + Mask = SystemZ::TDCMASK_MINUS; + } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { + // icmp sgt (bitcast X), -1 - set if sign bit false + Mask = SystemZ::TDCMASK_PLUS; + } else { + // Not a sign bit check. + return; + } + PossibleJunk.insert(Cast); + converted(&I, V, Mask, true); + } else if (auto *CI = dyn_cast<CallInst>(Op0)) { + // Check if this is a pre-existing call of our tdc intrinsic. + Function *F = CI->getCalledFunction(); + if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) + return; + if (!Const->isZero()) + return; + Value *V = CI->getArgOperand(0); + auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + // Bail if the mask is not a constant. + if (!MaskC) + return; + int Mask = MaskC->getZExtValue(); + Mask &= SystemZ::TDCMASK_ALL; + if (Pred == CmpInst::ICMP_NE) { + // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC + } else if (Pred == CmpInst::ICMP_EQ) { + // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask + Mask ^= SystemZ::TDCMASK_ALL; + } else { + // An unknown comparison - ignore. + return; + } + PossibleJunk.insert(CI); + converted(&I, V, Mask, false); + } +} + +void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { + Value *Op0, *Op1; + int Mask0, Mask1; + bool Worthy0, Worthy1; + std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; + std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; + if (Op0 != Op1) + return; + int Mask; + switch (I.getOpcode()) { + case Instruction::And: + Mask = Mask0 & Mask1; + break; + case Instruction::Or: + Mask = Mask0 | Mask1; + break; + case Instruction::Xor: + Mask = Mask0 ^ Mask1; + break; + default: + llvm_unreachable("Unknown op in convertLogicOp"); + } + converted(&I, Op0, Mask, true); +} + +bool SystemZTDCPass::runOnFunction(Function &F) { + ConvertedInsts.clear(); + LogicOpsWorklist.clear(); + PossibleJunk.clear(); + + // Look for icmp+fcmp instructions. + for (auto &I : instructions(F)) { + if (I.getOpcode() == Instruction::FCmp) + convertFCmp(cast<CmpInst>(I)); + else if (I.getOpcode() == Instruction::ICmp) + convertICmp(cast<CmpInst>(I)); + } + + // If none found, bail already. + if (ConvertedInsts.empty()) + return false; + + // Process the queue of logic instructions. + while (!LogicOpsWorklist.empty()) { + BinaryOperator *Op = LogicOpsWorklist.back(); + LogicOpsWorklist.pop_back(); + // If both operands mapped, and the instruction itself not yet mapped, + // convert it. + if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && + ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && + !ConvertedInsts.count(Op)) + convertLogicOp(*Op); + } + + // Time to actually replace the instructions. Do it in the reverse order + // of finding them, since there's a good chance the earlier ones will be + // unused (due to being folded into later ones). + Module &M = *F.getParent(); + auto &Ctx = M.getContext(); + Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + bool MadeChange = false; + for (auto &It : reverse(ConvertedInsts)) { + Instruction *I = It.first; + Value *V; + int Mask; + bool Worthy; + std::tie(V, Mask, Worthy) = It.second; + if (!I->user_empty()) { + // If used and unworthy of conversion, skip it. + if (!Worthy) + continue; + // Call the intrinsic, compare result with 0. + Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, + V->getType()); + IRBuilder<> IRB(I); + Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); + Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); + Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); + I->replaceAllUsesWith(ICmp); + } + // If unused, or used and converted, remove it. + I->eraseFromParent(); + MadeChange = true; + } + + if (!MadeChange) + return false; + + // We've actually done something - now clear misc accumulated junk (fabs, + // bitcast). + for (auto *I : PossibleJunk) + if (I->user_empty()) + I->eraseFromParent(); + + return true; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index f305e85f6cfe..85a3f6f4a8be 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -10,6 +10,7 @@ #include "SystemZTargetMachine.h" #include "SystemZTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -79,13 +80,22 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, return Ret; } +static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { + // Static code is suitable for use in a dynamic executable; there is no + // separate DynamicNoPIC model. + if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC) + return Reloc::Static; + return *RM; +} + SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options, - RM, CM, OL), + getEffectiveRelocModel(RM), CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); @@ -112,6 +122,9 @@ public: } // end anonymous namespace void SystemZPassConfig::addIRPasses() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZTDCPass()); + TargetPassConfig::addIRPasses(); } @@ -125,8 +138,7 @@ bool SystemZPassConfig::addInstSelector() { } void SystemZPassConfig::addPreSched2() { - if (getOptLevel() != CodeGenOpt::None && - getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond()) + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index 1a8f1f7f3aaa..69cf9bc6e525 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -29,7 +29,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { public: SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~SystemZTargetMachine() override; |