diff options
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 619 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Utils.cpp | 27 |
4 files changed, 638 insertions, 46 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3a52959d54bf..755b3b844570 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" @@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, Builder.setInstrAndDebugLoc(MI); auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); NewPhi.addDef(DstReg); - for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { - auto &MO = MI.getOperand(SrcIdx); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { if (!MO.isReg()) { NewPhi.addMBB(MO.getMBB()); continue; @@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( unsigned NumElts = DstTy.getNumElements(); SmallBitVector ExtractedElts(NumElts); - for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), - MRI.use_instr_nodbg_end())) { + for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) { if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) return false; auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); @@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase( MatchInfo(Builder); } +bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_OR); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + unsigned BitWidth = Ty.getScalarSizeInBits(); + + Register ShlSrc, ShlAmt, LShrSrc, LShrAmt; + unsigned FshOpc = 0; + + // Match (or (shl x, amt), (lshr y, sub(bw, amt))). + if (mi_match( + Dst, MRI, + // m_GOr() handles the commuted version as well. + m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), + m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth), + m_Reg(LShrAmt)))))) { + FshOpc = TargetOpcode::G_FSHL; + + // Match (or (shl x, sub(bw, amt)), (lshr y, amt)). + } else if (mi_match(Dst, MRI, + m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)), + m_GShl(m_Reg(ShlSrc), + m_GSub(m_SpecificICstOrSplat(BitWidth), + m_Reg(ShlAmt)))))) { + FshOpc = TargetOpcode::G_FSHR; + + } else { + return false; + } + + if (ShlAmt != LShrAmt) + return false; + + LLT AmtTy = MRI.getType(ShlAmt); + if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt}); + }; + return true; +} + /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); - // Check for a constant 2 or a splat of 2 on the RHS. - auto RHS = MI.getOperand(3).getReg(); - bool IsVector = MRI.getType(RHS).isVector(); - if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) - return false; - if (IsVector) { - // FIXME: There's no mi_match pattern for this yet. - auto *RHSDef = getDefIgnoringCopies(RHS, MRI); - if (!RHSDef) - return false; - auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); - if (!Splat || *Splat != 2) - return false; - } + + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2))) + return false; MatchInfo = [=, &MI](MachineIRBuilder &B) { Observer.changingInstr(MI); @@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } +/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either +/// due to global flags or MachineInstr flags. +static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) { + if (MI.getOpcode() != TargetOpcode::G_FMUL) + return false; + return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract); +} + +static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, + const MachineRegisterInfo &MRI) { + return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()) > + std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()); +} + +bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, + bool &AllowFusionGlobally, + bool &HasFMAD, bool &Aggressive, + bool CanReassociate) { + + auto *MF = MI.getMF(); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + const TargetOptions &Options = MF->getTarget().Options; + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + + if (CanReassociate && + !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + return false; + + // Floating-point multiply-add with intermediate rounding. + HasFMAD = (LI && TLI.isFMADLegal(MI, DstType)); + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) && + isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}}); + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return false; + + AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath || HasFMAD; + // If the addition is not contractable, do not combine. + if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract)) + return false; + + Aggressive = TLI.enableAggressiveFMAFusion(DstType); + return true; +} + +bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (isContractableFMul(*LHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), + RHS->getOperand(0).getReg()}); + }; + return true; + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + if (isContractableFMul(*RHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(), + LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + MachineInstr *FpExtSrc; + if (mi_match(LHS->getOperand(0).getReg(), MRI, + m_GFPExt(m_MInstr(FpExtSrc))) && + isContractableFMul(*FpExtSrc, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FpExtSrc->getOperand(1).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); + auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()}); + }; + return true; + } + + // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z) + // Note: Commutes FADD operands. + if (mi_match(RHS->getOperand(0).getReg(), MRI, + m_GFPExt(m_MInstr(FpExtSrc))) && + isContractableFMul(*FpExtSrc, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FpExtSrc->getOperand(1).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); + auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + MachineInstr *FMA = nullptr; + Register Z; + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) + if (LHS->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() == + TargetOpcode::G_FMUL) && + MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) { + FMA = LHS; + Z = RHS->getOperand(0).getReg(); + } + // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z)) + else if (RHS->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() == + TargetOpcode::G_FMUL) && + MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) { + Z = LHS->getOperand(0).getReg(); + FMA = RHS; + } + + if (FMA) { + MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg()); + Register X = FMA->getOperand(1).getReg(); + Register Y = FMA->getOperand(2).getReg(); + Register U = FMulMI->getOperand(1).getReg(); + Register V = FMulMI->getOperand(2).getReg(); + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register InnerFMA = MRI.createGenericVirtualRegister(DstTy); + B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z}); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {X, Y, InnerFMA}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + if (!Aggressive) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // Builds: (fma x, y, (fma (fpext u), (fpext v), z)) + auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X, + Register Y, MachineIRBuilder &B) { + Register FpExtU = B.buildFPExt(DstType, U).getReg(0); + Register FpExtV = B.buildFPExt(DstType, V).getReg(0); + Register InnerFMA = + B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z}) + .getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {X, Y, InnerFMA}); + }; + + MachineInstr *FMulMI, *FMAMI; + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + if (LHS->getOpcode() == PreferredFusedOpcode && + mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(), + LHS->getOperand(2).getReg(), B); + }; + return true; + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + FMAMI->getOpcode() == PreferredFusedOpcode) { + MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); + if (isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMAMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + Register X = FMAMI->getOperand(1).getReg(); + Register Y = FMAMI->getOperand(2).getReg(); + X = B.buildFPExt(DstType, X).getReg(0); + Y = B.buildFPExt(DstType, Y).getReg(0); + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + RHS->getOperand(0).getReg(), X, Y, B); + }; + + return true; + } + } + + // fold (fadd z, (fma x, y, (fpext (fmul u, v))) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + if (RHS->getOpcode() == PreferredFusedOpcode && + mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(), + RHS->getOperand(2).getReg(), B); + }; + return true; + } + + // fold (fadd z, (fpext (fma x, y, (fmul u, v))) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + FMAMI->getOpcode() == PreferredFusedOpcode) { + MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); + if (isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMAMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + Register X = FMAMI->getOperand(1).getReg(); + Register Y = FMAMI->getOperand(2).getReg(); + X = B.buildFPExt(DstType, X).getReg(0); + Y = B.buildFPExt(DstType, Y).getReg(0); + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + LHS->getOperand(0).getReg(), X, Y, B); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + int FirstMulHasFewerUses = true; + if (isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally) && + hasMoreUses(*LHS, *RHS, MRI)) + FirstMulHasFewerUses = false; + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // fold (fsub (fmul x, y), z) -> (fma x, y, -z) + if (FirstMulHasFewerUses && + (isContractableFMul(*LHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ}); + }; + return true; + } + // fold (fsub x, (fmul y, z)) -> (fma -y, z, x) + else if ((isContractableFMul(*RHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + MachineInstr *FMulMI; + // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) && + (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) && + MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) && + isContractableFMul(*FMulMI, AllowFusionGlobally)) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegX = + B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegX, FMulMI->getOperand(2).getReg(), NegZ}); + }; + return true; + } + + // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x) + if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) && + (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) && + MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) && + isContractableFMul(*FMulMI, AllowFusionGlobally)) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), LHSReg}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + MachineInstr *FMulMI; + // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) + if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FpExtX = + B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register FpExtY = + B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0); + Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX, FpExtY, NegZ}); + }; + return true; + } + + // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x) + if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FpExtY = + B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0); + Register FpExtZ = + B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, FpExtZ, LHSReg}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z, + MachineIRBuilder &B) { + Register FpExtX = B.buildFPExt(DstTy, X).getReg(0); + Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0); + B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z}); + }; + + MachineInstr *FMulMI; + // fold (fsub (fpext (fneg (fmul x, y))), z) -> + // (fneg (fma (fpext x), (fpext y), z)) + // fold (fsub (fneg (fpext (fmul x, y))), z) -> + // (fneg (fma (fpext x), (fpext y), z)) + if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) || + mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FMAReg = MRI.createGenericVirtualRegister(DstTy); + buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), RHSReg, B); + B.buildFNeg(MI.getOperand(0).getReg(), FMAReg); + }; + return true; + } + + // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x) + // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x) + if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) || + mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), LHSReg, B); + }; + return true; + } + + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c74bec7dfc0d..e09cd26eb0c1 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -585,8 +585,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, // FIXME: What does the original arg index mean here? SmallVector<CallLowering::ArgInfo, 3> Args; - for (unsigned i = 1; i < MI.getNumOperands(); i++) - Args.push_back({MI.getOperand(i).getReg(), OpType, 0}); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + Args.push_back({MO.getReg(), OpType, 0}); return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args); } @@ -1500,8 +1500,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideDstTy = LLT::scalar(NumMerge * WideSize); // Decompose the original operands if they don't evenly divide. - for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { + Register SrcReg = MO.getReg(); if (GCD == SrcSize) { Unmerges.push_back(SrcReg); } else { @@ -4037,8 +4037,8 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, // Break into a common type SmallVector<Register, 16> Parts; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) - extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + extractGCDType(Parts, GCDTy, MO.getReg()); // Build the requested new merge, padding with undef. LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, @@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, // of that value loaded. This can result in a sequence of loads and stores // mixed types, depending on what the target specifies as good types to use. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); unsigned Size = KnownLen; for (auto CopyTy : MemOps) { // Issuing an unaligned load / store pair that overlaps with the previous @@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, Register LoadPtr = Src; Register Offset; if (CurrOffset != 0) { - Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + LLT SrcTy = MRI.getType(Src); + Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset) .getReg(0); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); // Create the store. - Register StorePtr = - CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + Register StorePtr = Dst; + if (CurrOffset != 0) { + LLT DstTy = MRI.getType(Dst); + StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + } MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); Size -= CopyTy.getSizeInBytes(); @@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, // Apart from that, this loop is pretty much doing the same thing as the // memcpy codegen function. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); SmallVector<Register, 16> LoadVals; for (auto CopyTy : MemOps) { // Construct MMO for the load. @@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, // Create the load. Register LoadPtr = Src; if (CurrOffset != 0) { + LLT SrcTy = MRI.getType(Src); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, Register StorePtr = Dst; if (CurrOffset != 0) { + LLT DstTy = MRI.getType(Dst); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 1a2102e3ef21..650500c7eb31 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -123,7 +123,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( Register Reg = MI.getOperand(OpIdx).getReg(); const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg)); - // Sanity check that the target properly implemented getRegBankFromRegClass. + // Check that the target properly implemented getRegBankFromRegClass. assert(RegBank.covers(*RC) && "The mapping of the register bank does not make sense"); return &RegBank; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1a440c064a59..b0b84763e922 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -834,10 +834,9 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, case TargetOpcode::G_BUILD_VECTOR: { // TODO: Probably should have a recursion depth guard since you could have // bitcasted vector elements. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB)) + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB)) return false; - } return true; } @@ -845,8 +844,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, // Only handle constants since we would need to know if number of leading // zeros is greater than the truncation amount. const unsigned BitWidth = Ty.getScalarSizeInBits(); - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { + auto Const = getIConstantVRegVal(MO.getReg(), MRI); if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2()) return false; } @@ -1031,16 +1030,22 @@ Optional<ValueAndVReg> getAnyConstantSplat(Register VReg, return SplatValAndReg; } -bool isBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - int64_t SplatValue, bool AllowUndef) { - if (auto SplatValAndReg = - getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef)) +} // end anonymous namespace + +bool llvm::isBuildVectorConstantSplat(const Register Reg, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef)) return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue)); return false; } -} // end anonymous namespace +bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue, + AllowUndef); +} Optional<int64_t> llvm::getBuildVectorConstantSplat(const MachineInstr &MI, |