diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 248 |
1 files changed, 169 insertions, 79 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 025397b1eac0..dd515b0bf2f1 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -40,7 +40,10 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : CurrCycleInstr(nullptr), MF(MF), ST(MF.getSubtarget<SISubtarget>()), - TII(*ST.getInstrInfo()) { + TII(*ST.getInstrInfo()), + TRI(TII.getRegisterInfo()), + ClauseUses(TRI.getNumRegUnits()), + ClauseDefs(TRI.getNumRegUnits()) { MaxLookAhead = 5; } @@ -84,6 +87,18 @@ static bool isSMovRel(unsigned Opcode) { } } +static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::S_SENDMSG: + case AMDGPU::S_SENDMSGHALT: + case AMDGPU::S_TTRACEDATA: + return true; + default: + // TODO: GDS + return false; + } +} + static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, AMDGPU::OpName::simm16); @@ -97,7 +112,10 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) return NoopHazard; - if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) + // FIXME: Should flat be considered vmem? + if ((SIInstrInfo::isVMEM(*MI) || + SIInstrInfo::isFLAT(*MI)) + && checkVMEMHazards(MI) > 0) return NoopHazard; if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) @@ -121,10 +139,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) return NoopHazard; - if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && + if (ST.hasReadM0MovRelInterpHazard() && + (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && checkReadM0Hazards(MI) > 0) return NoopHazard; + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) && + checkReadM0Hazards(MI) > 0) + return NoopHazard; + + if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) + return NoopHazard; + if (checkAnyInstHazards(MI) > 0) return NoopHazard; @@ -141,26 +167,23 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (SIInstrInfo::isSMRD(*MI)) return std::max(WaitStates, checkSMRDHazards(MI)); - if (SIInstrInfo::isVALU(*MI)) { - WaitStates = std::max(WaitStates, checkVALUHazards(MI)); - - if (SIInstrInfo::isVMEM(*MI)) - WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); + if (SIInstrInfo::isVALU(*MI)) + WaitStates = std::max(WaitStates, checkVALUHazards(MI)); - if (SIInstrInfo::isDPP(*MI)) - WaitStates = std::max(WaitStates, checkDPPHazards(MI)); + if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) + WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); - if (isDivFMas(MI->getOpcode())) - WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); + if (SIInstrInfo::isDPP(*MI)) + WaitStates = std::max(WaitStates, checkDPPHazards(MI)); - if (isRWLane(MI->getOpcode())) - WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); + if (isDivFMas(MI->getOpcode())) + WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); - if (TII.isVINTRP(*MI)) - WaitStates = std::max(WaitStates, checkReadM0Hazards(MI)); + if (isRWLane(MI->getOpcode())) + WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); - return WaitStates; - } + if (MI->isInlineAsm()) + return std::max(WaitStates, checkInlineAsmHazards(MI)); if (isSGetReg(MI->getOpcode())) return std::max(WaitStates, checkGetRegHazards(MI)); @@ -171,7 +194,11 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (isRFE(MI->getOpcode())) return std::max(WaitStates, checkRFEHazards(MI)); - if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) + if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || + isSMovRel(MI->getOpcode()))) + return std::max(WaitStates, checkReadM0Hazards(MI)); + + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI)) return std::max(WaitStates, checkReadM0Hazards(MI)); return WaitStates; @@ -225,7 +252,8 @@ int GCNHazardRecognizer::getWaitStatesSince( return WaitStates; unsigned Opcode = MI->getOpcode(); - if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF) + if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF || + Opcode == AMDGPU::INLINEASM) continue; } ++WaitStates; @@ -257,19 +285,37 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg( // No-op Hazard Detection //===----------------------------------------------------------------------===// -static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops, - std::set<unsigned> &Set) { +static void addRegUnits(const SIRegisterInfo &TRI, + BitVector &BV, unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) + BV.set(*RUI); +} + +static void addRegsToSet(const SIRegisterInfo &TRI, + iterator_range<MachineInstr::const_mop_iterator> Ops, + BitVector &Set) { for (const MachineOperand &Op : Ops) { if (Op.isReg()) - Set.insert(Op.getReg()); + addRegUnits(TRI, Set, Op.getReg()); } } -int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { - // SMEM soft clause are only present on VI+ - if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) +void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { + // XXX: Do we need to worry about implicit operands + addRegsToSet(TRI, MI.defs(), ClauseDefs); + addRegsToSet(TRI, MI.uses(), ClauseUses); +} + +int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { + // SMEM soft clause are only present on VI+, and only matter if xnack is + // enabled. + if (!ST.isXNACKEnabled()) return 0; + bool IsSMRD = TII.isSMRD(*MEM); + + resetClause(); + // A soft-clause is any group of consecutive SMEM instructions. The // instructions in this group may return out of order and/or may be // replayed (i.e. the same instruction issued more than once). @@ -280,51 +326,39 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { // (including itself). If we encounter this situaion, we need to break the // clause by inserting a non SMEM instruction. - std::set<unsigned> ClauseDefs; - std::set<unsigned> ClauseUses; - for (MachineInstr *MI : EmittedInstrs) { - // When we hit a non-SMEM instruction then we have passed the start of the // clause and we can stop. - if (!MI || !SIInstrInfo::isSMRD(*MI)) + if (!MI) break; - addRegsToSet(MI->defs(), ClauseDefs); - addRegsToSet(MI->uses(), ClauseUses); + if (IsSMRD != SIInstrInfo::isSMRD(*MI)) + break; + + addClauseInst(*MI); } - if (ClauseDefs.empty()) + if (ClauseDefs.none()) return 0; - // FIXME: When we support stores, we need to make sure not to put loads and - // stores in the same clause if they use the same address. For now, just - // start a new clause whenever we see a store. - if (SMEM->mayStore()) + // We need to make sure not to put loads and stores in the same clause if they + // use the same address. For now, just start a new clause whenever we see a + // store. + if (MEM->mayStore()) return 1; - addRegsToSet(SMEM->defs(), ClauseDefs); - addRegsToSet(SMEM->uses(), ClauseUses); - - std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size())); - std::vector<unsigned>::iterator End; - - End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(), - ClauseUses.begin(), ClauseUses.end(), Result.begin()); + addClauseInst(*MEM); // If the set of defs and uses intersect then we cannot add this instruction // to the clause, so we have a hazard. - if (End != Result.begin()) - return 1; - - return 0; + return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; } int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); int WaitStatesNeeded = 0; - WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); + WaitStatesNeeded = checkSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) @@ -334,6 +368,9 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { // SGPR was written by a VALU instruction. int SmrdSgprWaitStates = 4; auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; + auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); }; + + bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); for (const MachineOperand &Use : SMRD->uses()) { if (!Use.isReg()) @@ -341,23 +378,35 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { int WaitStatesNeededForUse = SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + + // This fixes what appears to be undocumented hardware behavior in SI where + // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor + // needs some number of nops in between. We don't know how many we need, but + // let's use 4. This wasn't discovered before probably because the only + // case when this happens is when we expand a 64-bit pointer into a full + // descriptor and use s_buffer_load_dword instead of s_load_dword, which was + // probably never encountered in the closed-source land. + if (IsBufferSMRD) { + int WaitStatesNeededForUse = + SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), + IsBufferHazardDefFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } } + return WaitStatesNeeded; } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - const SIInstrInfo *TII = ST.getInstrInfo(); - if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return 0; - const SIRegisterInfo &TRI = TII->getRegisterInfo(); + int WaitStatesNeeded = checkSoftClauseHazards(VMEM); // A read of an SGPR by a VMEM instruction requires 5 wait states when the // SGPR was written by a VALU Instruction. - int VmemSgprWaitStates = 5; - int WaitStatesNeeded = 0; - auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + const int VmemSgprWaitStates = 5; + auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; for (const MachineOperand &Use : VMEM->uses()) { if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) @@ -372,10 +421,13 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); - // Check for DPP VGPR read after VALU VGPR write. + // Check for DPP VGPR read after VALU VGPR write and EXEC write. int DppVgprWaitStates = 2; + int DppExecWaitStates = 5; int WaitStatesNeeded = 0; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; for (const MachineOperand &Use : DPP->uses()) { if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) @@ -385,6 +437,10 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } + WaitStatesNeeded = std::max( + WaitStatesNeeded, + DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn)); + return WaitStatesNeeded; } @@ -475,39 +531,76 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { return -1; } +int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, + const MachineRegisterInfo &MRI) { + // Helper to check for the hazard where VMEM instructions that store more than + // 8 bytes can have there store data over written by the next instruction. + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + const int VALUWaitStates = 1; + int WaitStatesNeeded = 0; + + if (!TRI->isVGPR(MRI, Def.getReg())) + return WaitStatesNeeded; + unsigned Reg = Def.getReg(); + auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { + int DataIdx = createsVALUHazard(*MI); + return DataIdx >= 0 && + TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); + }; + int WaitStatesNeededForDef = + VALUWaitStates - getWaitStatesSince(IsHazardFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + + return WaitStatesNeeded; +} + int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { // This checks for the hazard where VMEM instructions that store more than // 8 bytes can have there store data over written by the next instruction. if (!ST.has12DWordStoreHazard()) return 0; - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo(); - - const int VALUWaitStates = 1; + const MachineRegisterInfo &MRI = MF.getRegInfo(); int WaitStatesNeeded = 0; for (const MachineOperand &Def : VALU->defs()) { - if (!TRI->isVGPR(MRI, Def.getReg())) - continue; - unsigned Reg = Def.getReg(); - auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { - int DataIdx = createsVALUHazard(*MI); - return DataIdx >= 0 && - TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); - }; - int WaitStatesNeededForDef = - VALUWaitStates - getWaitStatesSince(IsHazardFn); - WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); + } + + return WaitStatesNeeded; +} + +int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { + // This checks for hazards associated with inline asm statements. + // Since inline asms can contain just about anything, we use this + // to call/leverage other check*Hazard routines. Note that + // this function doesn't attempt to address all possible inline asm + // hazards (good luck), but is a collection of what has been + // problematic thus far. + + // see checkVALUHazards() + if (!ST.has12DWordStoreHazard()) + return 0; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + int WaitStatesNeeded = 0; + + for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); + I != E; ++I) { + const MachineOperand &Op = IA->getOperand(I); + if (Op.isReg() && Op.isDef()) { + WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); + } } + return WaitStatesNeeded; } int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const MachineRegisterInfo &MRI = - RWLane->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineOperand *LaneSelectOp = TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); @@ -568,11 +661,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) { } int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { - if (!ST.hasReadM0Hazard()) - return 0; - const SIInstrInfo *TII = ST.getInstrInfo(); - int SMovRelWaitStates = 1; + const int SMovRelWaitStates = 1; auto IsHazardFn = [TII] (MachineInstr *MI) { return TII->isSALU(*MI); }; |