aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp248
1 files changed, 169 insertions, 79 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 025397b1eac0..dd515b0bf2f1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -40,7 +40,10 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
CurrCycleInstr(nullptr),
MF(MF),
ST(MF.getSubtarget<SISubtarget>()),
- TII(*ST.getInstrInfo()) {
+ TII(*ST.getInstrInfo()),
+ TRI(TII.getRegisterInfo()),
+ ClauseUses(TRI.getNumRegUnits()),
+ ClauseDefs(TRI.getNumRegUnits()) {
MaxLookAhead = 5;
}
@@ -84,6 +87,18 @@ static bool isSMovRel(unsigned Opcode) {
}
}
+static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSGHALT:
+ case AMDGPU::S_TTRACEDATA:
+ return true;
+ default:
+ // TODO: GDS
+ return false;
+ }
+}
+
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
@@ -97,7 +112,10 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;
- if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
+ // FIXME: Should flat be considered vmem?
+ if ((SIInstrInfo::isVMEM(*MI) ||
+ SIInstrInfo::isFLAT(*MI))
+ && checkVMEMHazards(MI) > 0)
return NoopHazard;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@@ -121,10 +139,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return NoopHazard;
- if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+ if (ST.hasReadM0MovRelInterpHazard() &&
+ (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
+ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
+ checkReadM0Hazards(MI) > 0)
+ return NoopHazard;
+
+ if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
+ return NoopHazard;
+
if (checkAnyInstHazards(MI) > 0)
return NoopHazard;
@@ -141,26 +167,23 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
- if (SIInstrInfo::isVALU(*MI)) {
- WaitStates = std::max(WaitStates, checkVALUHazards(MI));
-
- if (SIInstrInfo::isVMEM(*MI))
- WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+ if (SIInstrInfo::isVALU(*MI))
+ WaitStates = std::max(WaitStates, checkVALUHazards(MI));
- if (SIInstrInfo::isDPP(*MI))
- WaitStates = std::max(WaitStates, checkDPPHazards(MI));
+ if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
+ WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
- if (isDivFMas(MI->getOpcode()))
- WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
+ if (SIInstrInfo::isDPP(*MI))
+ WaitStates = std::max(WaitStates, checkDPPHazards(MI));
- if (isRWLane(MI->getOpcode()))
- WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
+ if (isDivFMas(MI->getOpcode()))
+ WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
- if (TII.isVINTRP(*MI))
- WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
+ if (isRWLane(MI->getOpcode()))
+ WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
- return WaitStates;
- }
+ if (MI->isInlineAsm())
+ return std::max(WaitStates, checkInlineAsmHazards(MI));
if (isSGetReg(MI->getOpcode()))
return std::max(WaitStates, checkGetRegHazards(MI));
@@ -171,7 +194,11 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (isRFE(MI->getOpcode()))
return std::max(WaitStates, checkRFEHazards(MI));
- if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
+ if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
+ isSMovRel(MI->getOpcode())))
+ return std::max(WaitStates, checkReadM0Hazards(MI));
+
+ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
return WaitStates;
@@ -225,7 +252,8 @@ int GCNHazardRecognizer::getWaitStatesSince(
return WaitStates;
unsigned Opcode = MI->getOpcode();
- if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF)
+ if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
+ Opcode == AMDGPU::INLINEASM)
continue;
}
++WaitStates;
@@ -257,19 +285,37 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg(
// No-op Hazard Detection
//===----------------------------------------------------------------------===//
-static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops,
- std::set<unsigned> &Set) {
+static void addRegUnits(const SIRegisterInfo &TRI,
+ BitVector &BV, unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
+ BV.set(*RUI);
+}
+
+static void addRegsToSet(const SIRegisterInfo &TRI,
+ iterator_range<MachineInstr::const_mop_iterator> Ops,
+ BitVector &Set) {
for (const MachineOperand &Op : Ops) {
if (Op.isReg())
- Set.insert(Op.getReg());
+ addRegUnits(TRI, Set, Op.getReg());
}
}
-int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
- // SMEM soft clause are only present on VI+
- if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
+void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
+ // XXX: Do we need to worry about implicit operands
+ addRegsToSet(TRI, MI.defs(), ClauseDefs);
+ addRegsToSet(TRI, MI.uses(), ClauseUses);
+}
+
+int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
+ // SMEM soft clause are only present on VI+, and only matter if xnack is
+ // enabled.
+ if (!ST.isXNACKEnabled())
return 0;
+ bool IsSMRD = TII.isSMRD(*MEM);
+
+ resetClause();
+
// A soft-clause is any group of consecutive SMEM instructions. The
// instructions in this group may return out of order and/or may be
// replayed (i.e. the same instruction issued more than once).
@@ -280,51 +326,39 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
// (including itself). If we encounter this situaion, we need to break the
// clause by inserting a non SMEM instruction.
- std::set<unsigned> ClauseDefs;
- std::set<unsigned> ClauseUses;
-
for (MachineInstr *MI : EmittedInstrs) {
-
// When we hit a non-SMEM instruction then we have passed the start of the
// clause and we can stop.
- if (!MI || !SIInstrInfo::isSMRD(*MI))
+ if (!MI)
break;
- addRegsToSet(MI->defs(), ClauseDefs);
- addRegsToSet(MI->uses(), ClauseUses);
+ if (IsSMRD != SIInstrInfo::isSMRD(*MI))
+ break;
+
+ addClauseInst(*MI);
}
- if (ClauseDefs.empty())
+ if (ClauseDefs.none())
return 0;
- // FIXME: When we support stores, we need to make sure not to put loads and
- // stores in the same clause if they use the same address. For now, just
- // start a new clause whenever we see a store.
- if (SMEM->mayStore())
+ // We need to make sure not to put loads and stores in the same clause if they
+ // use the same address. For now, just start a new clause whenever we see a
+ // store.
+ if (MEM->mayStore())
return 1;
- addRegsToSet(SMEM->defs(), ClauseDefs);
- addRegsToSet(SMEM->uses(), ClauseUses);
-
- std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size()));
- std::vector<unsigned>::iterator End;
-
- End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(),
- ClauseUses.begin(), ClauseUses.end(), Result.begin());
+ addClauseInst(*MEM);
// If the set of defs and uses intersect then we cannot add this instruction
// to the clause, so we have a hazard.
- if (End != Result.begin())
- return 1;
-
- return 0;
+ return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
}
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
int WaitStatesNeeded = 0;
- WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
+ WaitStatesNeeded = checkSoftClauseHazards(SMRD);
// This SMRD hazard only affects SI.
if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
@@ -334,6 +368,9 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
// SGPR was written by a VALU instruction.
int SmrdSgprWaitStates = 4;
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
+ auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
+
+ bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
for (const MachineOperand &Use : SMRD->uses()) {
if (!Use.isReg())
@@ -341,23 +378,35 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
int WaitStatesNeededForUse =
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ // This fixes what appears to be undocumented hardware behavior in SI where
+ // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
+ // needs some number of nops in between. We don't know how many we need, but
+ // let's use 4. This wasn't discovered before probably because the only
+ // case when this happens is when we expand a 64-bit pointer into a full
+ // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
+ // probably never encountered in the closed-source land.
+ if (IsBufferSMRD) {
+ int WaitStatesNeededForUse =
+ SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
+ IsBufferHazardDefFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
}
+
return WaitStatesNeeded;
}
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
- const SIInstrInfo *TII = ST.getInstrInfo();
-
if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return 0;
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
// A read of an SGPR by a VMEM instruction requires 5 wait states when the
// SGPR was written by a VALU Instruction.
- int VmemSgprWaitStates = 5;
- int WaitStatesNeeded = 0;
- auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+ const int VmemSgprWaitStates = 5;
+ auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
for (const MachineOperand &Use : VMEM->uses()) {
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
@@ -372,10 +421,13 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
- // Check for DPP VGPR read after VALU VGPR write.
+ // Check for DPP VGPR read after VALU VGPR write and EXEC write.
int DppVgprWaitStates = 2;
+ int DppExecWaitStates = 5;
int WaitStatesNeeded = 0;
+ auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
for (const MachineOperand &Use : DPP->uses()) {
if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
@@ -385,6 +437,10 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
+ WaitStatesNeeded = std::max(
+ WaitStatesNeeded,
+ DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
+
return WaitStatesNeeded;
}
@@ -475,39 +531,76 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
return -1;
}
+int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
+ const MachineRegisterInfo &MRI) {
+ // Helper to check for the hazard where VMEM instructions that store more than
+ // 8 bytes can have there store data over written by the next instruction.
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ const int VALUWaitStates = 1;
+ int WaitStatesNeeded = 0;
+
+ if (!TRI->isVGPR(MRI, Def.getReg()))
+ return WaitStatesNeeded;
+ unsigned Reg = Def.getReg();
+ auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
+ int DataIdx = createsVALUHazard(*MI);
+ return DataIdx >= 0 &&
+ TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
+ };
+ int WaitStatesNeededForDef =
+ VALUWaitStates - getWaitStatesSince(IsHazardFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+
+ return WaitStatesNeeded;
+}
+
int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
// This checks for the hazard where VMEM instructions that store more than
// 8 bytes can have there store data over written by the next instruction.
if (!ST.has12DWordStoreHazard())
return 0;
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo();
-
- const int VALUWaitStates = 1;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
int WaitStatesNeeded = 0;
for (const MachineOperand &Def : VALU->defs()) {
- if (!TRI->isVGPR(MRI, Def.getReg()))
- continue;
- unsigned Reg = Def.getReg();
- auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
- int DataIdx = createsVALUHazard(*MI);
- return DataIdx >= 0 &&
- TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
- };
- int WaitStatesNeededForDef =
- VALUWaitStates - getWaitStatesSince(IsHazardFn);
- WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
+ // This checks for hazards associated with inline asm statements.
+ // Since inline asms can contain just about anything, we use this
+ // to call/leverage other check*Hazard routines. Note that
+ // this function doesn't attempt to address all possible inline asm
+ // hazards (good luck), but is a collection of what has been
+ // problematic thus far.
+
+ // see checkVALUHazards()
+ if (!ST.has12DWordStoreHazard())
+ return 0;
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ int WaitStatesNeeded = 0;
+
+ for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
+ I != E; ++I) {
+ const MachineOperand &Op = IA->getOperand(I);
+ if (Op.isReg() && Op.isDef()) {
+ WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
+ }
}
+
return WaitStatesNeeded;
}
int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const MachineRegisterInfo &MRI =
- RWLane->getParent()->getParent()->getRegInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
const MachineOperand *LaneSelectOp =
TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
@@ -568,11 +661,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
}
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
- if (!ST.hasReadM0Hazard())
- return 0;
-
const SIInstrInfo *TII = ST.getInstrInfo();
- int SMovRelWaitStates = 1;
+ const int SMovRelWaitStates = 1;
auto IsHazardFn = [TII] (MachineInstr *MI) {
return TII->isSALU(*MI);
};