diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARM.td | 30 | ||||
-rw-r--r-- | llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 363 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/P9InstrResources.td | 42 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 44 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 110 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MachineFunctionInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 19 |
18 files changed, 460 insertions, 252 deletions
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index d0a544273b8b..1a16468484ad 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -172,7 +172,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, int ImmS = MI->getOperand(4).getImm(); if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) && - (ImmR == 0 || ImmS < ImmR)) { + (ImmR == 0 || ImmS < ImmR) && + STI.getFeatureBits()[AArch64::HasV8_2aOps]) { // BFC takes precedence over its entire range, sligtly differently to BFI. int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; int LSB = (BitWidth - ImmR) % BitWidth; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 885239e2faed..4e8773203afb 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -952,6 +952,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { unsigned SDSTName; switch (MI->getOpcode()) { case AMDGPU::V_READLANE_B32: + case AMDGPU::V_READLANE_B32_gfx10: case AMDGPU::V_READFIRSTLANE_B32: SDSTName = AMDGPU::OpName::vdst; break; diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index b687db12eaf5..c575e84800f8 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -57,12 +57,15 @@ def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; multiclass VFPver<string name, string query, string description, - list<SubtargetFeature> prev = [], - list<SubtargetFeature> otherimplies = []> { + list<SubtargetFeature> prev, + list<SubtargetFeature> otherimplies, + list<SubtargetFeature> vfp2prev = []> { def _D16_SP: SubtargetFeature< name#"d16sp", query#"D16SP", "true", description#" with only 16 d-registers and no double precision", - !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>; + !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # + !foreach(v, vfp2prev, !cast<SubtargetFeature>(v # "_SP")) # + otherimplies>; def _SP: SubtargetFeature< name#"sp", query#"SP", "true", description#" with no double precision", @@ -72,6 +75,7 @@ multiclass VFPver<string name, string query, string description, name#"d16", query#"D16", "true", description#" with only 16 d-registers", !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) # + vfp2prev # otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>; def "": SubtargetFeature< name, query, "true", description, @@ -80,11 +84,23 @@ multiclass VFPver<string name, string query, string description, !cast<SubtargetFeature>(NAME # "_SP")]>; } -defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions", - [], [FeatureFPRegs]>; +def FeatureVFP2_D16_SP : SubtargetFeature<"vfp2d16sp", "HasVFPv2D16SP", "true", + "Enable VFP2 instructions with " + "no double precision", + [FeatureFPRegs]>; +def FeatureVFP2_SP : SubtargetFeature<"vfp2sp", "HasVFPv2SP", "true", + "Enable VFP2 instructions with " + "no double precision", + [FeatureVFP2_D16_SP]>; +def FeatureVFP2_D16 : SubtargetFeature<"vfp2d16", "HasVFPv2D16", "true", + "Enable VFP2 instructions", + [FeatureFP64, FeatureVFP2_D16_SP]>; +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions", + [FeatureVFP2_D16, FeatureVFP2_SP]>; defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions", - [FeatureVFP2]>; + [], [], [FeatureVFP2]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", @@ -98,7 +114,7 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", [FeatureVFP3], [FeatureFP16]>; defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", - [FeatureVFP4]>; + [FeatureVFP4], []>; def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index aee434cb0067..fd867acd234f 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -233,9 +233,14 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); - bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU, - SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI); + bool expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, SMLoc IDLoc, + MCStreamer &Out, const MCSubtargetInfo *STI); bool expandLoadAddress(unsigned DstReg, unsigned BaseReg, const MCOperand &Offset, bool Is32BitAddress, @@ -2454,25 +2459,21 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, : MER_Success; case Mips::LoadImmSingleGPR: - return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadSingleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmSingleFGR: - return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadSingleImmToFPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleGPR: - return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleFGR: - return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToFPR(Inst, true, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleFGR_32: - return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToFPR(Inst, false, IDLoc, Out, STI) ? MER_Fail + : MER_Success; + case Mips::Ulh: return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::Ulhu: @@ -3289,10 +3290,45 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, return false; } -bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, - bool Is64FPU, SMLoc IDLoc, - MCStreamer &Out, - const MCSubtargetInfo *STI) { +static uint64_t convertIntToDoubleImm(uint64_t ImmOp64) { + // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the + // exponent field), convert it to double (e.g. 1 to 1.0) + if ((Hi_32(ImmOp64) & 0x7ff00000) == 0) { + APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); + ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + } + return ImmOp64; +} + +static uint32_t covertDoubleImmToSingleImm(uint64_t ImmOp64) { + // Conversion of a double in an uint64_t to a float in a uint32_t, + // retaining the bit pattern of a float. + double DoubleImm = BitsToDouble(ImmOp64); + float TmpFloat = static_cast<float>(DoubleImm); + return FloatToBits(TmpFloat); +} + +bool MipsAsmParser::expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + uint32_t ImmOp32 = covertDoubleImmToSingleImm(ImmOp64); + + return loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc, + Out, STI); +} + +bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { MipsTargetStreamer &TOut = getTargetStreamer(); assert(Inst.getNumOperands() == 2 && "Invalid operand count"); assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && @@ -3301,166 +3337,189 @@ bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, unsigned FirstReg = Inst.getOperand(0).getReg(); uint64_t ImmOp64 = Inst.getOperand(1).getImm(); - uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; - // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the - // exponent field), convert it to double (e.g. 1 to 1.0) - if ((HiImmOp64 & 0x7ff00000) == 0) { - APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); - ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + uint32_t ImmOp32 = covertDoubleImmToSingleImm(ImmOp64); + + unsigned TmpReg = getATReg(IDLoc); + if (!TmpReg) + return true; + + if (Lo_32(ImmOp64) == 0) { + if (loadImmediate(ImmOp32, TmpReg, Mips::NoRegister, true, true, IDLoc, Out, + STI)) + return true; + TOut.emitRR(Mips::MTC1, FirstReg, TmpReg, IDLoc, STI); + return false; } - uint32_t LoImmOp64 = ImmOp64 & 0xffffffff; - HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(ImmOp32, 4); + getStreamer().SwitchSection(CS); - if (IsSingle) { - // Conversion of a double in an uint64_t to a float in a uint32_t, - // retaining the bit pattern of a float. - uint32_t ImmOp32; - double doubleImm = BitsToDouble(ImmOp64); - float tmp_float = static_cast<float>(doubleImm); - ImmOp32 = FloatToBits(tmp_float); + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Mips::LWC1, FirstReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); + return false; +} + +bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + uint32_t LoImmOp64 = Lo_32(ImmOp64); + uint32_t HiImmOp64 = Hi_32(ImmOp64); + + unsigned TmpReg = getATReg(IDLoc); + if (!TmpReg) + return true; - if (IsGPR) { - if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc, + if (LoImmOp64 == 0) { + if (isABI_N32() || isABI_N64()) { + if (loadImmediate(ImmOp64, FirstReg, Mips::NoRegister, false, true, IDLoc, Out, STI)) return true; - return false; } else { - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) + if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true, + IDLoc, Out, STI)) return true; - if (LoImmOp64 == 0) { - if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc, - Out, STI)) - return true; - TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI); - return false; - } - - MCSection *CS = getStreamer().getCurrentSectionOnly(); - // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections - // where appropriate. - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(ImmOp32, 4); - getStreamer().SwitchSection(CS); - if(emitPartialAddress(TOut, IDLoc, Sym)) + if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true, + IDLoc, Out, STI)) return true; - TOut.emitRRX(Mips::LWC1, FirstReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); } return false; } - // if(!IsSingle) - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) + MCSection *CS = getStreamer().getCurrentSectionOnly(); + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitValueToAlignment(8); + getStreamer().EmitIntValue(ImmOp64, 8); + getStreamer().SwitchSection(CS); + + if (emitPartialAddress(TOut, IDLoc, Sym)) return true; - if (IsGPR) { - if (LoImmOp64 == 0) { - if(isABI_N32() || isABI_N64()) { - if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true, - IDLoc, Out, STI)) - return true; - return false; - } else { - if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true, - IDLoc, Out, STI)) - return true; + if (isABI_N64()) + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); + else + TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); - if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true, - IDLoc, Out, STI)) - return true; - return false; - } - } + if (isABI_N32() || isABI_N64()) + TOut.emitRRI(Mips::LD, FirstReg, TmpReg, 0, IDLoc, STI); + else { + TOut.emitRRI(Mips::LW, FirstReg, TmpReg, 0, IDLoc, STI); + TOut.emitRRI(Mips::LW, nextReg(FirstReg), TmpReg, 4, IDLoc, STI); + } + return false; +} - MCSection *CS = getStreamer().getCurrentSectionOnly(); - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); +bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, + SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(HiImmOp64, 4); - getStreamer().EmitIntValue(LoImmOp64, 4); - getStreamer().SwitchSection(CS); + ImmOp64 = convertIntToDoubleImm(ImmOp64); - if(emitPartialAddress(TOut, IDLoc, Sym)) - return true; - if(isABI_N64()) - TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); - else - TOut.emitRRX(Mips::ADDiu, ATReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); + uint32_t LoImmOp64 = Lo_32(ImmOp64); + uint32_t HiImmOp64 = Hi_32(ImmOp64); - if(isABI_N32() || isABI_N64()) - TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI); - else { - TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI); - TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI); - } - return false; - } else { // if(!IsGPR && !IsSingle) - if ((LoImmOp64 == 0) && - !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) { - // FIXME: In the case where the constant is zero, we can load the - // register directly from the zero register. - if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc, + unsigned TmpReg = getATReg(IDLoc); + if (!TmpReg) + return true; + + if ((LoImmOp64 == 0) && + !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) { + // FIXME: In the case where the constant is zero, we can load the + // register directly from the zero register. + + if (isABI_N32() || isABI_N64()) { + if (loadImmediate(ImmOp64, TmpReg, Mips::NoRegister, false, false, IDLoc, Out, STI)) return true; - if (isABI_N32() || isABI_N64()) - TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI); - else if (hasMips32r2()) { - TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); - TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI); - } else { - TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI); - TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); - } + TOut.emitRR(Mips::DMTC1, FirstReg, TmpReg, IDLoc, STI); return false; } - MCSection *CS = getStreamer().getCurrentSectionOnly(); - // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections - // where appropriate. - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + if (loadImmediate(HiImmOp64, TmpReg, Mips::NoRegister, true, false, IDLoc, + Out, STI)) + return true; - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + if (hasMips32r2()) { + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, TmpReg, IDLoc, STI); + } else { + TOut.emitRR(Mips::MTC1, nextReg(FirstReg), TmpReg, IDLoc, STI); + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + } + return false; + } - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(HiImmOp64, 4); - getStreamer().EmitIntValue(LoImmOp64, 4); - getStreamer().SwitchSection(CS); + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitValueToAlignment(8); + getStreamer().EmitIntValue(ImmOp64, 8); + getStreamer().SwitchSection(CS); + + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; + + TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); - if(emitPartialAddress(TOut, IDLoc, Sym)) - return true; - TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); - } return false; } diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 2a10322d3f49..a3efc9059268 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -128,14 +128,14 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "MTVSRW(A|Z)$"), (instregex "CMP(WI|LWI|W|LW)(8)?$"), (instregex "CMP(L)?D(I)?$"), - (instregex "SUBF(I)?C(8)?$"), + (instregex "SUBF(I)?C(8)?(O)?$"), (instregex "ANDI(S)?o(8)?$"), - (instregex "ADDC(8)?$"), + (instregex "ADDC(8)?(O)?$"), (instregex "ADDIC(8)?(o)?$"), - (instregex "ADD(8|4)(o)?$"), - (instregex "ADD(E|ME|ZE)(8)?(o)?$"), - (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"), - (instregex "NEG(8)?(o)?$"), + (instregex "ADD(8|4)(O)?(o)?$"), + (instregex "ADD(E|ME|ZE)(8)?(O)?(o)?$"), + (instregex "SUBF(E|ME|ZE)?(8)?(O)?(o)?$"), + (instregex "NEG(8)?(O)?(o)?$"), (instregex "POPCNTB$"), (instregex "ADD(I|IS)?(8)?$"), (instregex "LI(S)?(8)?$"), @@ -147,7 +147,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "EQV(8)?(o)?$"), (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), (instregex "ADD(4|8)(TLS)?(_)?$"), - (instregex "NEG(8)?$"), + (instregex "NEG(8)?(O)?$"), (instregex "ADDI(S)?toc(HA|L)$"), COPY, MCRF, @@ -397,7 +397,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], (instrs (instregex "MADD(HD|HDU|LD|LD8)$"), - (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") + (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") )>; // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three @@ -456,7 +456,7 @@ def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, DISP_3SLOTS_1C, DISP_1C], (instrs - (instregex "MUL(H|L)(D|W)(U)?o$") + (instregex "MUL(H|L)(D|W)(U)?(O)?o$") )>; // 7 cycle Restricted DP operation and one 3 cycle ALU operation. @@ -944,7 +944,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], (instrs DIVW, + DIVWO, DIVWU, + DIVWUO, MODSW )>; @@ -954,9 +956,13 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], (instrs DIVWE, + DIVWEO, DIVD, + DIVDO, DIVWEU, + DIVWEUO, DIVDU, + DIVDUO, MODSD, MODUD, MODUW @@ -968,7 +974,9 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], (instrs DIVDE, - DIVDEU + DIVDEO, + DIVDEU, + DIVDEUO )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation @@ -987,9 +995,13 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, DISP_EVEN_1C, DISP_1C], (instrs DIVDo, + DIVDOo, DIVDUo, + DIVDUOo, DIVWEo, - DIVWEUo + DIVWEOo, + DIVWEUo, + DIVWEUOo )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation @@ -999,7 +1011,9 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, DISP_EVEN_1C, DISP_1C], (instrs DIVDEo, - DIVDEUo + DIVDEOo, + DIVDEUo, + DIVDEUOo )>; // CR access instructions in _BrMCR, IIC_BrMCRX. @@ -1024,8 +1038,8 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs - (instregex "ADDC(8)?o$"), - (instregex "SUBFC(8)?o$") + (instregex "ADDC(8)?(O)?o$"), + (instregex "SUBFC(8)?(O)?o$") )>; // Cracked ALU operations. diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index d598567f8e4e..0be98b420302 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -497,9 +497,9 @@ def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; let isCommutable = 1 in -defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "add", "$rT, $rA, $rB", IIC_IntSimple, - [(set i64:$rT, (add i64:$rA, i64:$rB))]>; +defm ADD8 : XOForm_1rx<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "add", "$rT, $rA, $rB", IIC_IntSimple, + [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. We need to forbid r0 here - // while it works for add just fine, the linker can relax this to local-exec @@ -576,9 +576,9 @@ defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; -defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subf", "$rT, $rA, $rB", IIC_IntGeneral, - [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; +defm SUBF8 : XOForm_1rx<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subf", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA), "neg", "$rT, $rA", IIC_IntSimple, [(set i64:$rT, (ineg i64:$rA))]>; @@ -777,10 +777,10 @@ defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divdu", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64; -def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divde $rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, - isPPC64, Requires<[HasExtDiv]>; +defm DIVDE : XOForm_1rcr<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; let Predicates = [IsISA3_0] in { def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), @@ -815,24 +815,14 @@ def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), [(set i64:$rT, (urem i64:$rA, i64:$rB))]>; } -let Defs = [CR0] in -def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divde. $rT, $rA, $rB", IIC_IntDivD, - []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, - isPPC64, Requires<[HasExtDiv]>; -def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdeu $rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>, - isPPC64, Requires<[HasExtDiv]>; -let Defs = [CR0] in -def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdeu. $rT, $rA, $rB", IIC_IntDivD, - []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, - isPPC64, Requires<[HasExtDiv]>; +defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; let isCommutable = 1 in -defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulld", "$rT, $rA, $rB", IIC_IntMulHD, - [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; +defm MULLD : XOForm_1rx<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulld", "$rT, $rA, $rB", IIC_IntMulHD, + [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "mulli $rD, $rA, $imm", IIC_IntMulLI, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index a787bdd56b9d..b593a98e81a6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3529,8 +3529,10 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg(); unsigned Opc = MI.getOpcode(); - bool SpecialShift32 = - Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo; + bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLWo || + Opc == PPC::SRW || Opc == PPC::SRWo || + Opc == PPC::SLW8 || Opc == PPC::SLW8o || + Opc == PPC::SRW8 || Opc == PPC::SRW8o; bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo; bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo || diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c313337047f0..d61e7fd90648 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1023,6 +1023,32 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, } } +// Multiclass for instructions which have a record overflow form as well +// as a record form but no carry (i.e. mulld, mulldo, subf, subfo, etc.) +multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, 0, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, 0, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } + let BaseName = !strconcat(asmbase, "O") in { + let Defs = [XER] in + def O : XOForm_1<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o ", asmstr)), itin, + []>, RecFormRel; + let Defs = [XER, CR0] in + def Oo : XOForm_1<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o. ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + // Multiclass for instructions for which the non record form is not cracked // and the record form is cracked (i.e. divw, mullw, etc.) multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, @@ -1038,6 +1064,16 @@ multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, []>, isDOT, RecFormRel, PPC970_DGroup_First, PPC970_DGroup_Cracked; } + let BaseName = !strconcat(asmbase, "O") in { + let Defs = [XER] in + def O : XOForm_1<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o ", asmstr)), itin, + []>, RecFormRel; + let Defs = [XER, CR0] in + def Oo : XOForm_1<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o. ", asmstr)), itin, + []>, isDOT, RecFormRel; + } } multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, @@ -1053,6 +1089,16 @@ multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, !strconcat(asmbase, !strconcat(". ", asmstr)), itin, []>, isDOT, RecFormRel; } + let BaseName = !strconcat(asmbase, "O") in { + let Defs = [CARRY, XER] in + def O : XOForm_1<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o ", asmstr)), itin, + []>, RecFormRel; + let Defs = [CARRY, XER, CR0] in + def Oo : XOForm_1<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o. ", asmstr)), itin, + []>, isDOT, RecFormRel; + } } multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, @@ -1067,6 +1113,16 @@ multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, !strconcat(asmbase, !strconcat(". ", asmstr)), itin, []>, isDOT, RecFormRel; } + let BaseName = !strconcat(asmbase, "O") in { + let Defs = [XER] in + def O : XOForm_3<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o ", asmstr)), itin, + []>, RecFormRel; + let Defs = [XER, CR0] in + def Oo : XOForm_3<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o. ", asmstr)), itin, + []>, isDOT, RecFormRel; + } } multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, @@ -1082,6 +1138,16 @@ multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, !strconcat(asmbase, !strconcat(". ", asmstr)), itin, []>, isDOT, RecFormRel; } + let BaseName = !strconcat(asmbase, "O") in { + let Defs = [CARRY, XER] in + def O : XOForm_3<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o ", asmstr)), itin, + []>, RecFormRel; + let Defs = [CARRY, XER, CR0] in + def Oo : XOForm_3<opcode, xo, 1, OOL, IOL, + !strconcat(asmbase, !strconcat("o. ", asmstr)), itin, + []>, isDOT, RecFormRel; + } } multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL, @@ -2776,9 +2842,9 @@ def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit let isCommutable = 1 in -defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "add", "$rT, $rA, $rB", IIC_IntSimple, - [(set i32:$rT, (add i32:$rA, i32:$rB))]>; +defm ADD4 : XOForm_1rx<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "add", "$rT, $rA, $rB", IIC_IntSimple, + [(set i32:$rT, (add i32:$rA, i32:$rB))]>; let isCodeGenOnly = 1 in def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB), "add $rT, $rA, $rB", IIC_IntSimple, @@ -2795,24 +2861,14 @@ defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divwu", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>; -def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwe $rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, - Requires<[HasExtDiv]>; -let Defs = [CR0] in -def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwe. $rT, $rA, $rB", IIC_IntDivW, - []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, - Requires<[HasExtDiv]>; -def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divweu $rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, - Requires<[HasExtDiv]>; -let Defs = [CR0] in -def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divweu. $rT, $rA, $rB", IIC_IntDivW, - []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, - Requires<[HasExtDiv]>; +defm DIVWE : XOForm_1rcr<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +defm DIVWEU : XOForm_1rcr<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, @@ -2820,13 +2876,13 @@ defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; -defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mullw", "$rT, $rA, $rB", IIC_IntMulHW, - [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; +defm MULLW : XOForm_1rx<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mullw", "$rT, $rA, $rB", IIC_IntMulHW, + [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; } // isCommutable -defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subf", "$rT, $rA, $rB", IIC_IntGeneral, - [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; +defm SUBF : XOForm_1rx<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subf", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index ff3dfbfaca05..9e9997df9ed1 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -331,8 +331,12 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::lrint: Opcode = ISD::LRINT; break; + case Intrinsic::llrint: Opcode = ISD::LLRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::lround: Opcode = ISD::LROUND; break; + case Intrinsic::llround: Opcode = ISD::LLROUND; break; case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 4f339475508f..56a50fe6ddc0 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -52,6 +52,12 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { bool trackLivenessAfterRegAlloc(const MachineFunction &) const override { return true; } + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override { + return &RISCV::GPRRegClass; + } }; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td index 5c9b34f44734..104f5f7d2e68 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -121,10 +121,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs), (SELECT_F32 F32:$rhs, F32:$lhs, I32:$cond)>; def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs), (SELECT_F64 F64:$rhs, F64:$lhs, I32:$cond)>; - -// The legalizer inserts an unnecessary `and 1` to make input conform -// to getBooleanContents, which we can lower away. -def : Pat<(select (i32 (and I32:$cond, 1)), F32:$lhs, F32:$rhs), - (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>; -def : Pat<(select (i32 (and I32:$cond, 1)), F64:$lhs, F64:$rhs), - (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index e310fe069117..75cd92b3adc1 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1396,9 +1396,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int FI; if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { if (X86::FR64RegClass.contains(Reg)) { + int Offset; unsigned IgnoredFrameReg; - int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg); - Offset += SEHFrameOffset; + if (IsWin64Prologue && IsFunclet) + Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); + else + Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) + + SEHFrameOffset; HasWinCFI = true; assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data"); @@ -1554,9 +1558,13 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { unsigned X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); // This is the size of the pushed CSRs. - unsigned CSSize = - MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + // This is the size of callee saved XMMs. + const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); + unsigned XMMSize = WinEHXMMSlotInfo.size() * + TRI->getSpillSize(X86::VR128RegClass); // This is the amount of stack a funclet needs to allocate. unsigned UsedSize; EHPersonality Personality = @@ -1576,7 +1584,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment()); // Subtract out the size of the callee saved registers. This is how much stack // each funclet will allocate. - return FrameSizeMinusRBP - CSSize; + return FrameSizeMinusRBP + XMMSize - CSSize; } static bool isTailCallOpcode(unsigned Opc) { @@ -1850,6 +1858,20 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } +int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); + const auto it = WinEHXMMSlotInfo.find(FI); + + if (it == WinEHXMMSlotInfo.end()) + return getFrameIndexReference(MF, FI, FrameReg); + + FrameReg = TRI->getStackRegister(); + return alignDown(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second; +} + int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, unsigned &FrameReg, int Adjustment) const { @@ -1948,6 +1970,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); unsigned CalleeSavedFrameSize = 0; + unsigned XMMCalleeSavedFrameSize = 0; + auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -2025,12 +2049,20 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment - SpillSlotOffset -= std::abs(SpillSlotOffset) % Align; + assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86"); + SpillSlotOffset = -alignTo(-SpillSlotOffset, Align); + // spill into slot SpillSlotOffset -= Size; int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); MFI.ensureMaxAlignment(Align); + + // Save the start offset and size of XMM in stack frame for funclets. + if (X86::VR128RegClass.contains(Reg)) { + WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; + XMMCalleeSavedFrameSize += Size; + } } return true; diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index d32746e3a36e..c5218cc09b8a 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -99,6 +99,8 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getWin64EHFrameIndexRef(const MachineFunction &MF, + int FI, unsigned &SPReg) const; int getFrameIndexReferenceSP(const MachineFunction &MF, int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0c5b8a79dd62..920cdd7e625e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21182,12 +21182,14 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { "Expecting 256/512-bit op"); // Splitting volatile memory ops is not allowed unless the operation was not - // legal to begin with. We are assuming the input op is legal (this transform - // is only used for targets with AVX). + // legal to begin with. Assume the input store is legal (this transform is + // only used for targets with AVX). Note: It is possible that we have an + // illegal type like v2i128, and so we could allow splitting a volatile store + // in that case if that is important. if (Store->isVolatile()) return SDValue(); - MVT StoreVT = StoredVal.getSimpleValueType(); + EVT StoreVT = StoredVal.getValueType(); unsigned NumElems = StoreVT.getVectorNumElements(); unsigned HalfSize = StoredVal.getValueSizeInBits() / 2; unsigned HalfAlign = (128 == HalfSize ? 16 : 32); @@ -33651,14 +33653,14 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) { // When the operands of a horizontal math op are identical, the low half of // the result is the same as the high half. If a target shuffle is also - // replicating low and high halves, we don't need the shuffle. + // replicating low and high halves (and without changing the type/length of + // the vector), we don't need the shuffle. if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { - if (HOp.getScalarValueSizeInBits() == 64) { + if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X assert((HOp.getValueType() == MVT::v2f64 || - HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT && - "Unexpected type for h-op"); + HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op"); return updateHOp(HOp, DAG); } return SDValue(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index db36bcb929e3..d873edb857a8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -422,7 +422,8 @@ namespace llvm { // Tests Types Of a FP Values for scalar types. VFPCLASSS, - // Broadcast scalar to vector. + // Broadcast (splat) scalar or element 0 of a vector. If the operand is + // a vector, this node may change the vector length as part of the splat. VBROADCAST, // Broadcast mask to vector. VBROADCASTM, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index dbe45356c42b..e039e7125bce 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1085,6 +1085,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; case X86::SUB32ri8: case X86::SUB32ri: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1111,6 +1113,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SUB64ri8: case X86::SUB64ri32: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index d7e535598d81..5cb80a082b56 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -36,6 +36,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// is stashed. signed char RestoreBasePointerOffset = 0; + /// WinEHXMMSlotInfo - Slot information of XMM registers in the stack frame + /// in bytes. + DenseMap<int, unsigned> WinEHXMMSlotInfo; + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize = 0; @@ -120,6 +124,10 @@ public: void setRestoreBasePointer(const MachineFunction *MF); int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; } + DenseMap<int, unsigned>& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; } + const DenseMap<int, unsigned>& getWinEHXMMSlotInfo() const { + return WinEHXMMSlotInfo; } + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 2e2f1f9e438a..c8966dfffa0c 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -692,12 +692,27 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { return true; } +static bool isFuncletReturnInstr(MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::CATCHRET: + case X86::CLEANUPRET: + return true; + default: + return false; + } + llvm_unreachable("impossible"); +} + void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false + : isFuncletReturnInstr(*MBBI); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -709,6 +724,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && "Return instruction can only reference SP relative frame objects"); FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { + FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); } else { FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); } |