aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp213
1 files changed, 213 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fdee74d58d26..f255d098b631 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3956,6 +3956,219 @@ AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
}};
}
+static Register buildRegSequence(SmallVectorImpl<Register> &Elts,
+ MachineInstr *InsertPt,
+ MachineRegisterInfo &MRI) {
+ const TargetRegisterClass *DstRegClass;
+ switch (Elts.size()) {
+ case 8:
+ DstRegClass = &AMDGPU::VReg_256RegClass;
+ break;
+ case 4:
+ DstRegClass = &AMDGPU::VReg_128RegClass;
+ break;
+ case 2:
+ DstRegClass = &AMDGPU::VReg_64RegClass;
+ break;
+ default:
+ llvm_unreachable("unhandled Reg sequence size");
+ }
+
+ MachineIRBuilder B(*InsertPt);
+ auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(MRI.createVirtualRegister(DstRegClass));
+ for (unsigned i = 0; i < Elts.size(); ++i) {
+ MIB.addReg(Elts[i]);
+ MIB.addImm(SIRegisterInfo::getSubRegFromChannel(i));
+ }
+ return MIB->getOperand(0).getReg();
+}
+
+static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
+ SmallVectorImpl<Register> &Elts, Register &Src,
+ MachineInstr *InsertPt,
+ MachineRegisterInfo &MRI) {
+ if (ModOpcode == TargetOpcode::G_FNEG) {
+ Mods |= SISrcMods::NEG;
+ // Check if all elements also have abs modifier
+ SmallVector<Register, 8> NegAbsElts;
+ for (auto El : Elts) {
+ Register FabsSrc;
+ if (!mi_match(El, MRI, m_GFabs(m_Reg(FabsSrc))))
+ break;
+ NegAbsElts.push_back(FabsSrc);
+ }
+ if (Elts.size() != NegAbsElts.size()) {
+ // Neg
+ Src = buildRegSequence(Elts, InsertPt, MRI);
+ } else {
+ // Neg and Abs
+ Mods |= SISrcMods::NEG_HI;
+ Src = buildRegSequence(NegAbsElts, InsertPt, MRI);
+ }
+ } else {
+ assert(ModOpcode == TargetOpcode::G_FABS);
+ // Abs
+ Mods |= SISrcMods::NEG_HI;
+ Src = buildRegSequence(Elts, InsertPt, MRI);
+ }
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned ModOpcode;
+ SmallVector<Register, 8> EltsF32;
+
+ if (GBuildVector *BV = dyn_cast<GBuildVector>(MRI->getVRegDef(Src))) {
+ for (unsigned i = 0; i < BV->getNumSources(); ++i) {
+ MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsF32.empty())
+ ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG
+ : AMDGPU::G_FABS;
+ if (ElF32->getOpcode() != ModOpcode)
+ break;
+ EltsF32.push_back(ElF32->getOperand(1).getReg());
+ }
+
+ // All elements had ModOpcode modifier
+ if (BV->getNumSources() == EltsF32.size()) {
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, Root.getParent(),
+ *MRI);
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ SmallVector<Register, 8> EltsV2F16;
+
+ if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) {
+ for (unsigned i = 0; i < CV->getNumSources(); ++i) {
+ Register FNegSrc;
+ if (!mi_match(CV->getSourceReg(i), *MRI, m_GFNeg(m_Reg(FNegSrc))))
+ break;
+ EltsV2F16.push_back(FNegSrc);
+ }
+
+ // All elements had ModOpcode modifier
+ if (CV->getNumSources() == EltsV2F16.size()) {
+ Mods |= SISrcMods::NEG;
+ Mods |= SISrcMods::NEG_HI;
+ Src = buildRegSequence(EltsV2F16, Root.getParent(), *MRI);
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned ModOpcode;
+ SmallVector<Register, 8> EltsV2F16;
+
+ if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) {
+ for (unsigned i = 0; i < CV->getNumSources(); ++i) {
+ MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsV2F16.empty())
+ ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG
+ : AMDGPU::G_FABS;
+ if (ElV2F16->getOpcode() != ModOpcode)
+ break;
+ EltsV2F16.push_back(ElV2F16->getOperand(1).getReg());
+ }
+
+ // All elements had ModOpcode modifier
+ if (CV->getNumSources() == EltsV2F16.size()) {
+ MachineIRBuilder B(*Root.getParent());
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, Root.getParent(),
+ *MRI);
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const {
+ std::optional<FPValueAndVReg> FPValReg;
+ if (mi_match(Root.getReg(), *MRI, m_GFCstOrSplat(FPValReg))) {
+ if (TII.isInlineConstant(FPValReg->Value.bitcastToAPInt())) {
+ return {{[=](MachineInstrBuilder &MIB) {
+ MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
+ }}};
+ }
+ // Non-inlineable splat floats should not fall-through for integer immediate
+ // checks.
+ return {};
+ }
+
+ APInt ICst;
+ if (mi_match(Root.getReg(), *MRI, m_ICstOrSplat(ICst))) {
+ if (TII.isInlineConstant(ICst)) {
+ return {
+ {[=](MachineInstrBuilder &MIB) { MIB.addImm(ICst.getSExtValue()); }}};
+ }
+ }
+
+ return {};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const {
+ Register Src =
+ getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg();
+ unsigned Key = 0;
+
+ Register ShiftSrc;
+ std::optional<ValueAndVReg> ShiftAmt;
+ if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) &&
+ MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
+ ShiftAmt->Value.getZExtValue() % 8 == 0) {
+ Key = ShiftAmt->Value.getZExtValue() / 8;
+ Src = ShiftSrc;
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const {
+
+ Register Src =
+ getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg();
+ unsigned Key = 0;
+
+ Register ShiftSrc;
+ std::optional<ValueAndVReg> ShiftAmt;
+ if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) &&
+ MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
+ ShiftAmt->Value.getZExtValue() == 16) {
+ Src = ShiftSrc;
+ Key = 1;
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key
+ }};
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
Register Src;