1 files changed, 169 insertions, 79 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 025397b1eac0..dd515b0bf2f1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -40,7 +40,10 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
   CurrCycleInstr(nullptr),
   MF(MF),
   ST(MF.getSubtarget<SISubtarget>()),
-  TII(*ST.getInstrInfo()) {
+  TII(*ST.getInstrInfo()),
+  TRI(TII.getRegisterInfo()),
+  ClauseUses(TRI.getNumRegUnits()),
+  ClauseDefs(TRI.getNumRegUnits()) {
   MaxLookAhead = 5;
 }
 
@@ -84,6 +87,18 @@ static bool isSMovRel(unsigned Opcode) {
   }
 }
 
+static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_SENDMSG:
+  case AMDGPU::S_SENDMSGHALT:
+  case AMDGPU::S_TTRACEDATA:
+    return true;
+  default:
+    // TODO: GDS
+    return false;
+  }
+}
+
 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
                                                      AMDGPU::OpName::simm16);
@@ -97,7 +112,10 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     return NoopHazard;
 
-  if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
+  // FIXME: Should flat be considered vmem?
+  if ((SIInstrInfo::isVMEM(*MI) ||
+       SIInstrInfo::isFLAT(*MI))
+      && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@@ -121,10 +139,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
     return NoopHazard;
 
-  if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+  if (ST.hasReadM0MovRelInterpHazard() &&
+      (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
+      checkReadM0Hazards(MI) > 0)
+    return NoopHazard;
+
+  if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
+    return NoopHazard;
+
   if (checkAnyInstHazards(MI) > 0)
     return NoopHazard;
 
@@ -141,26 +167,23 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(WaitStates, checkSMRDHazards(MI));
 
-  if (SIInstrInfo::isVALU(*MI)) {
-      WaitStates = std::max(WaitStates, checkVALUHazards(MI));
-
-    if (SIInstrInfo::isVMEM(*MI))
-      WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+  if (SIInstrInfo::isVALU(*MI))
+    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
 
-    if (SIInstrInfo::isDPP(*MI))
-      WaitStates = std::max(WaitStates, checkDPPHazards(MI));
+  if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
+    WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
 
-    if (isDivFMas(MI->getOpcode()))
-      WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
+  if (SIInstrInfo::isDPP(*MI))
+    WaitStates = std::max(WaitStates, checkDPPHazards(MI));
 
-    if (isRWLane(MI->getOpcode()))
-      WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
+  if (isDivFMas(MI->getOpcode()))
+    WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
 
-    if (TII.isVINTRP(*MI))
-      WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
+  if (isRWLane(MI->getOpcode()))
+    WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
 
-    return WaitStates;
-  }
+  if (MI->isInlineAsm())
+    return std::max(WaitStates, checkInlineAsmHazards(MI));
 
   if (isSGetReg(MI->getOpcode()))
     return std::max(WaitStates, checkGetRegHazards(MI));
@@ -171,7 +194,11 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
   if (isRFE(MI->getOpcode()))
     return std::max(WaitStates, checkRFEHazards(MI));
 
-  if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
+  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
+                                           isSMovRel(MI->getOpcode())))
+    return std::max(WaitStates, checkReadM0Hazards(MI));
+
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
   return WaitStates;
@@ -225,7 +252,8 @@ int GCNHazardRecognizer::getWaitStatesSince(
         return WaitStates;
 
       unsigned Opcode = MI->getOpcode();
-      if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF)
+      if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
+          Opcode == AMDGPU::INLINEASM)
         continue;
     }
     ++WaitStates;
@@ -257,19 +285,37 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg(
 // No-op Hazard Detection
 //===----------------------------------------------------------------------===//
 
-static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops,
-                         std::set<unsigned> &Set) {
+static void addRegUnits(const SIRegisterInfo &TRI,
+                        BitVector &BV, unsigned Reg) {
+  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
+    BV.set(*RUI);
+}
+
+static void addRegsToSet(const SIRegisterInfo &TRI,
+                         iterator_range<MachineInstr::const_mop_iterator> Ops,
+                         BitVector &Set) {
   for (const MachineOperand &Op : Ops) {
     if (Op.isReg())
-      Set.insert(Op.getReg());
+      addRegUnits(TRI, Set, Op.getReg());
   }
 }
 
-int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
-  // SMEM soft clause are only present on VI+
-  if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
+void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
+  // XXX: Do we need to worry about implicit operands
+  addRegsToSet(TRI, MI.defs(), ClauseDefs);
+  addRegsToSet(TRI, MI.uses(), ClauseUses);
+}
+
+int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
+  // SMEM soft clause are only present on VI+, and only matter if xnack is
+  // enabled.
+  if (!ST.isXNACKEnabled())
     return 0;
 
+  bool IsSMRD = TII.isSMRD(*MEM);
+
+  resetClause();
+
   // A soft-clause is any group of consecutive SMEM instructions.  The
   // instructions in this group may return out of order and/or may be
   // replayed (i.e. the same instruction issued more than once).
@@ -280,51 +326,39 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
   // (including itself). If we encounter this situaion, we need to break the
   // clause by inserting a non SMEM instruction.
 
-  std::set<unsigned> ClauseDefs;
-  std::set<unsigned> ClauseUses;
-
   for (MachineInstr *MI : EmittedInstrs) {
-
     // When we hit a non-SMEM instruction then we have passed the start of the
     // clause and we can stop.
-    if (!MI || !SIInstrInfo::isSMRD(*MI))
+    if (!MI)
       break;
 
-    addRegsToSet(MI->defs(), ClauseDefs);
-    addRegsToSet(MI->uses(), ClauseUses);
+    if (IsSMRD != SIInstrInfo::isSMRD(*MI))
+      break;
+
+    addClauseInst(*MI);
   }
 
-  if (ClauseDefs.empty())
+  if (ClauseDefs.none())
     return 0;
 
-  // FIXME: When we support stores, we need to make sure not to put loads and
-  // stores in the same clause if they use the same address.  For now, just
-  // start a new clause whenever we see a store.
-  if (SMEM->mayStore())
+  // We need to make sure not to put loads and stores in the same clause if they
+  // use the same address. For now, just start a new clause whenever we see a
+  // store.
+  if (MEM->mayStore())
     return 1;
 
-  addRegsToSet(SMEM->defs(), ClauseDefs);
-  addRegsToSet(SMEM->uses(), ClauseUses);
-
-  std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size()));
-  std::vector<unsigned>::iterator End;
-
-  End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(),
-                              ClauseUses.begin(), ClauseUses.end(), Result.begin());
+  addClauseInst(*MEM);
 
   // If the set of defs and uses intersect then we cannot add this instruction
   // to the clause, so we have a hazard.
-  if (End != Result.begin())
-    return 1;
-
-  return 0;
+  return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
 }
 
 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   int WaitStatesNeeded = 0;
 
-  WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
+  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
 
   // This SMRD hazard only affects SI.
   if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
@@ -334,6 +368,9 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
   // SGPR was written by a VALU instruction.
   int SmrdSgprWaitStates = 4;
   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
+  auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
+
+  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
 
   for (const MachineOperand &Use : SMRD->uses()) {
     if (!Use.isReg())
@@ -341,23 +378,35 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
     int WaitStatesNeededForUse =
         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+    // This fixes what appears to be undocumented hardware behavior in SI where
+    // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
+    // needs some number of nops in between. We don't know how many we need, but
+    // let's use 4. This wasn't discovered before probably because the only
+    // case when this happens is when we expand a 64-bit pointer into a full
+    // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
+    // probably never encountered in the closed-source land.
+    if (IsBufferSMRD) {
+      int WaitStatesNeededForUse =
+        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
+                                                   IsBufferHazardDefFn);
+      WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+    }
   }
+
   return WaitStatesNeeded;
 }
 
 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
-  const SIInstrInfo *TII = ST.getInstrInfo();
-
   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
     return 0;
 
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
 
   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
   // SGPR was written by a VALU Instruction.
-  int VmemSgprWaitStates = 5;
-  int WaitStatesNeeded = 0;
-  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+  const int VmemSgprWaitStates = 5;
+  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
 
   for (const MachineOperand &Use : VMEM->uses()) {
     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
@@ -372,10 +421,13 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
 
 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
 
-  // Check for DPP VGPR read after VALU VGPR write.
+  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
   int DppVgprWaitStates = 2;
+  int DppExecWaitStates = 5;
   int WaitStatesNeeded = 0;
+  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
 
   for (const MachineOperand &Use : DPP->uses()) {
     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
@@ -385,6 +437,10 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
 
+  WaitStatesNeeded = std::max(
+      WaitStatesNeeded,
+      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
+
   return WaitStatesNeeded;
 }
 
@@ -475,39 +531,76 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
   return -1;
 }
 
+int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
+						const MachineRegisterInfo &MRI) {
+  // Helper to check for the hazard where VMEM instructions that store more than
+  // 8 bytes can have there store data over written by the next instruction.
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  const int VALUWaitStates = 1;
+  int WaitStatesNeeded = 0;
+
+  if (!TRI->isVGPR(MRI, Def.getReg()))
+    return WaitStatesNeeded;
+  unsigned Reg = Def.getReg();
+  auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
+    int DataIdx = createsVALUHazard(*MI);
+    return DataIdx >= 0 &&
+    TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
+  };
+  int WaitStatesNeededForDef =
+    VALUWaitStates - getWaitStatesSince(IsHazardFn);
+  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+
+  return WaitStatesNeeded;
+}
+
 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
   // This checks for the hazard where VMEM instructions that store more than
   // 8 bytes can have there store data over written by the next instruction.
   if (!ST.has12DWordStoreHazard())
     return 0;
 
-  const SIRegisterInfo *TRI = ST.getRegisterInfo();
-  const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo();
-
-  const int VALUWaitStates = 1;
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   int WaitStatesNeeded = 0;
 
   for (const MachineOperand &Def : VALU->defs()) {
-    if (!TRI->isVGPR(MRI, Def.getReg()))
-      continue;
-    unsigned Reg = Def.getReg();
-    auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
-      int DataIdx = createsVALUHazard(*MI);
-      return DataIdx >= 0 &&
-             TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
-    };
-    int WaitStatesNeededForDef =
-        VALUWaitStates - getWaitStatesSince(IsHazardFn);
-    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+    WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
+  }
+
+  return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
+  // This checks for hazards associated with inline asm statements.
+  // Since inline asms can contain just about anything, we use this
+  // to call/leverage other check*Hazard routines. Note that
+  // this function doesn't attempt to address all possible inline asm
+  // hazards (good luck), but is a collection of what has been
+  // problematic thus far.
+
+  // see checkVALUHazards()
+  if (!ST.has12DWordStoreHazard())
+    return 0;
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  int WaitStatesNeeded = 0;
+
+  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
+       I != E; ++I) {
+    const MachineOperand &Op = IA->getOperand(I);
+    if (Op.isReg() && Op.isDef()) {
+      WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
+    }
   }
+
   return WaitStatesNeeded;
 }
 
 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
-  const MachineRegisterInfo &MRI =
-      RWLane->getParent()->getParent()->getRegInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
 
   const MachineOperand *LaneSelectOp =
       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
@@ -568,11 +661,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
 }
 
 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
-  if (!ST.hasReadM0Hazard())
-    return 0;
-
   const SIInstrInfo *TII = ST.getInstrInfo();
-  int SMovRelWaitStates = 1;
+  const int SMovRelWaitStates = 1;
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return TII->isSALU(*MI);
   };