aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/MCA
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-12-20 19:53:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-12-20 19:53:05 +0000
commit0b57cec536236d46e3dba9bd041533462f33dbb7 (patch)
tree56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm-project/llvm/lib/MCA
parent718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff)
downloadsrc-0b57cec536236d46e3dba9bd041533462f33dbb7.tar.gz
src-0b57cec536236d46e3dba9bd041533462f33dbb7.zip
Move all sources from the llvm project into contrib/llvm-project.
This uses the new layout of the upstream repository, which was recently migrated to GitHub, and converted into a "monorepo". That is, most of the earlier separate sub-projects with their own branches and tags were consolidated into one top-level directory, and are now branched and tagged together. Updating the vendor area to match this layout is next.
Notes
Notes: svn path=/head/; revision=355940
Diffstat (limited to 'contrib/llvm-project/llvm/lib/MCA')
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Context.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp206
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp500
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp353
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp343
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp717
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Instruction.cpp254
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp184
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp290
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Support.cpp110
19 files changed, 3560 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/MCA/Context.cpp b/contrib/llvm-project/llvm/lib/MCA/Context.cpp
new file mode 100644
index 000000000000..f0e8dfab8680
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Context.cpp
@@ -0,0 +1,69 @@
+//===---------------------------- Context.cpp -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units. A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/MicroOpQueueStage.h"
+#include "llvm/MCA/Stages/RetireStage.h"
+
+namespace llvm {
+namespace mca {
+
+std::unique_ptr<Pipeline>
+Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
+ SourceMgr &SrcMgr) {
+ const MCSchedModel &SM = STI.getSchedModel();
+
+ // Create the hardware units defining the backend.
+ auto RCU = llvm::make_unique<RetireControlUnit>(SM);
+ auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+ auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+ Opts.StoreQueueSize, Opts.AssumeNoAlias);
+ auto HWS = llvm::make_unique<Scheduler>(SM, *LSU);
+
+ // Create the pipeline stages.
+ auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
+ auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
+ *RCU, *PRF);
+ auto Execute =
+ llvm::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
+ auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
+
+ // Pass the ownership of all the hardware units to this Context.
+ addHardwareUnit(std::move(RCU));
+ addHardwareUnit(std::move(PRF));
+ addHardwareUnit(std::move(LSU));
+ addHardwareUnit(std::move(HWS));
+
+ // Build the pipeline.
+ auto StagePipeline = llvm::make_unique<Pipeline>();
+ StagePipeline->appendStage(std::move(Fetch));
+ if (Opts.MicroOpQueueSize)
+ StagePipeline->appendStage(llvm::make_unique<MicroOpQueueStage>(
+ Opts.MicroOpQueueSize, Opts.DecodersThroughput));
+ StagePipeline->appendStage(std::move(Dispatch));
+ StagePipeline->appendStage(std::move(Execute));
+ StagePipeline->appendStage(std::move(Retire));
+ return StagePipeline;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp b/contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp
new file mode 100644
index 000000000000..58b2e0329222
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp
@@ -0,0 +1,22 @@
+//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a vtable anchor for class HWEventListener.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HWEventListener.h"
+
+namespace llvm {
+namespace mca {
+
+// Anchor the vtable here.
+void HWEventListener::anchor() {}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
new file mode 100644
index 000000000000..69f793796ec7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -0,0 +1,24 @@
+//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the anchor for the base class that describes
+/// simulated hardware units.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+// Pin the vtable with this method.
+HardwareUnit::~HardwareUnit() = default;
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
new file mode 100644
index 000000000000..ac1a6a36547b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -0,0 +1,206 @@
+//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load-Store Unit for the llvm-mca tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+ bool AssumeNoAlias)
+ : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0),
+ NoAlias(AssumeNoAlias), NextGroupID(1) {
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (!LQSize && EPI.LoadQueueID) {
+ const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
+ LQSize = LdQDesc.BufferSize;
+ }
+
+ if (!SQSize && EPI.StoreQueueID) {
+ const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
+ SQSize = StQDesc.BufferSize;
+ }
+ }
+}
+
+LSUnitBase::~LSUnitBase() {}
+
+void LSUnitBase::cycleEvent() {
+ for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups)
+ G.second->cycleEvent();
+}
+
+#ifndef NDEBUG
+void LSUnitBase::dump() const {
+ dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n';
+ dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n';
+ dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n';
+ dbgs() << "[LSUnit] NextSQSlotIdx = " << getUsedSQEntries() << '\n';
+ dbgs() << "\n";
+ for (const auto &GroupIt : Groups) {
+ const MemoryGroup &Group = *GroupIt.second;
+ dbgs() << "[LSUnit] Group (" << GroupIt.first << "): "
+ << "[ #Preds = " << Group.getNumPredecessors()
+ << ", #GIssued = " << Group.getNumExecutingPredecessors()
+ << ", #GExecuted = " << Group.getNumExecutedPredecessors()
+ << ", #Inst = " << Group.getNumInstructions()
+ << ", #IIssued = " << Group.getNumExecuting()
+ << ", #IExecuted = " << Group.getNumExecuted() << '\n';
+ }
+}
+#endif
+
+unsigned LSUnit::dispatch(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ unsigned IsMemBarrier = Desc.HasSideEffects;
+ assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+
+ if (Desc.MayLoad)
+ assignLQSlot();
+ if (Desc.MayStore)
+ assignSQSlot();
+
+ if (Desc.MayStore) {
+ // Always create a new group for store operations.
+
+ // A store may not pass a previous store or store barrier.
+ unsigned NewGID = createMemoryGroup();
+ MemoryGroup &NewGroup = getGroup(NewGID);
+ NewGroup.addInstruction();
+
+ // A store may not pass a previous load or load barrier.
+ unsigned ImmediateLoadDominator =
+ std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
+ if (ImmediateLoadDominator) {
+ MemoryGroup &IDom = getGroup(ImmediateLoadDominator);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator
+ << ") --> (" << NewGID << ")\n");
+ IDom.addSuccessor(&NewGroup);
+ }
+ if (CurrentStoreGroupID) {
+ MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+ << ") --> (" << NewGID << ")\n");
+ StoreGroup.addSuccessor(&NewGroup);
+ }
+
+ CurrentStoreGroupID = NewGID;
+ if (Desc.MayLoad) {
+ CurrentLoadGroupID = NewGID;
+ if (IsMemBarrier)
+ CurrentLoadBarrierGroupID = NewGID;
+ }
+
+ return NewGID;
+ }
+
+ assert(Desc.MayLoad && "Expected a load!");
+
+ // Always create a new memory group if this is the first load of the sequence.
+
+ // A load may not pass a previous store unless flag 'NoAlias' is set.
+ // A load may pass a previous load.
+ // A younger load cannot pass a older load barrier.
+ // A load barrier cannot pass a older load.
+ bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier ||
+ CurrentLoadGroupID <= CurrentStoreGroupID ||
+ CurrentLoadGroupID <= CurrentLoadBarrierGroupID;
+ if (ShouldCreateANewGroup) {
+ unsigned NewGID = createMemoryGroup();
+ MemoryGroup &NewGroup = getGroup(NewGID);
+ NewGroup.addInstruction();
+
+ if (!assumeNoAlias() && CurrentStoreGroupID) {
+ MemoryGroup &StGroup = getGroup(CurrentStoreGroupID);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+ << ") --> (" << NewGID << ")\n");
+ StGroup.addSuccessor(&NewGroup);
+ }
+ if (CurrentLoadBarrierGroupID) {
+ MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID
+ << ") --> (" << NewGID << ")\n");
+ LdGroup.addSuccessor(&NewGroup);
+ }
+
+ CurrentLoadGroupID = NewGID;
+ if (IsMemBarrier)
+ CurrentLoadBarrierGroupID = NewGID;
+ return NewGID;
+ }
+
+ MemoryGroup &Group = getGroup(CurrentLoadGroupID);
+ Group.addInstruction();
+ return CurrentLoadGroupID;
+}
+
+LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.MayLoad && isLQFull())
+ return LSUnit::LSU_LQUEUE_FULL;
+ if (Desc.MayStore && isSQFull())
+ return LSUnit::LSU_SQUEUE_FULL;
+ return LSUnit::LSU_AVAILABLE;
+}
+
+void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+ assert((IsALoad || IsAStore) && "Expected a memory operation!");
+
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ auto It = Groups.find(GroupID);
+ It->second->onInstructionExecuted();
+ if (It->second->isExecuted()) {
+ Groups.erase(It);
+ }
+
+ if (IsALoad) {
+ UsedLQEntries--;
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+ << " has been removed from the load queue.\n");
+ }
+
+ if (IsAStore) {
+ UsedSQEntries--;
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+ << " has been removed from the store queue.\n");
+ }
+}
+
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+ const Instruction &IS = *IR.getInstruction();
+ if (!IS.isMemOp())
+ return;
+
+ LSUnitBase::onInstructionExecuted(IR);
+ unsigned GroupID = IS.getLSUTokenID();
+ if (!isValidGroupID(GroupID)) {
+ if (GroupID == CurrentLoadGroupID)
+ CurrentLoadGroupID = 0;
+ if (GroupID == CurrentStoreGroupID)
+ CurrentStoreGroupID = 0;
+ if (GroupID == CurrentLoadBarrierGroupID)
+ CurrentLoadBarrierGroupID = 0;
+ }
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
new file mode 100644
index 000000000000..86a888ea8cae
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -0,0 +1,500 @@
+//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class. This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+ unsigned NumRegs)
+ : MRI(mri),
+ RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
+ ZeroRegisters(mri.getNumRegs(), false) {
+ initialize(SM, NumRegs);
+}
+
+void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
+ // Create a default register file that "sees" all the machine registers
+ // declared by the target. The number of physical registers in the default
+ // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
+ // means: this register file has an unbounded number of physical registers.
+ RegisterFiles.emplace_back(NumRegs);
+ if (!SM.hasExtraProcessorInfo())
+ return;
+
+ // For each user defined register file, allocate a RegisterMappingTracker
+ // object. The size of every register file, as well as the mapping between
+ // register files and register classes is specified via tablegen.
+ const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
+
+ // Skip invalid register file at index 0.
+ for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) {
+ const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
+ assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!");
+
+ // The cost of a register definition is equivalent to the number of
+ // physical registers that are allocated at register renaming stage.
+ unsigned Length = RF.NumRegisterCostEntries;
+ const MCRegisterCostEntry *FirstElt =
+ &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
+ addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length));
+ }
+}
+
+void RegisterFile::cycleStart() {
+ for (RegisterMappingTracker &RMT : RegisterFiles)
+ RMT.NumMoveEliminated = 0;
+}
+
+void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
+ ArrayRef<MCRegisterCostEntry> Entries) {
+ // A default register file is always allocated at index #0. That register file
+ // is mainly used to count the total number of mappings created by all
+ // register files at runtime. Users can limit the number of available physical
+ // registers in register file #0 through the command line flag
+ // `-register-file-size`.
+ unsigned RegisterFileIndex = RegisterFiles.size();
+ RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
+ RF.AllowZeroMoveEliminationOnly);
+
+ // Special case where there is no register class identifier in the set.
+ // An empty set of register classes means: this register file contains all
+ // the physical registers specified by the target.
+ // We optimistically assume that a register can be renamed at the cost of a
+ // single physical register. The constructor of RegisterFile ensures that
+ // a RegisterMapping exists for each logical register defined by the Target.
+ if (Entries.empty())
+ return;
+
+ // Now update the cost of individual registers.
+ for (const MCRegisterCostEntry &RCE : Entries) {
+ const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
+ for (const MCPhysReg Reg : RC) {
+ RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
+ IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
+ if (IPC.first && IPC.first != RegisterFileIndex) {
+ // The only register file that is allowed to overlap is the default
+ // register file at index #0. The analysis is inaccurate if register
+ // files overlap.
+ errs() << "warning: register " << MRI.getName(Reg)
+ << " defined in multiple register files.";
+ }
+ IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
+ Entry.RenameAs = Reg;
+ Entry.AllowMoveElimination = RCE.AllowMoveElimination;
+
+ // Assume the same cost for each sub-register.
+ for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
+ RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+ if (!OtherEntry.IndexPlusCost.first &&
+ (!OtherEntry.RenameAs ||
+ MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+ OtherEntry.IndexPlusCost = IPC;
+ OtherEntry.RenameAs = Reg;
+ }
+ }
+ }
+ }
+}
+
+void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
+ MutableArrayRef<unsigned> UsedPhysRegs) {
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
+ if (RegisterFileIndex) {
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ RMT.NumUsedPhysRegs += Cost;
+ UsedPhysRegs[RegisterFileIndex] += Cost;
+ }
+
+ // Now update the default register mapping tracker.
+ RegisterFiles[0].NumUsedPhysRegs += Cost;
+ UsedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
+ MutableArrayRef<unsigned> FreedPhysRegs) {
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
+ if (RegisterFileIndex) {
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ RMT.NumUsedPhysRegs -= Cost;
+ FreedPhysRegs[RegisterFileIndex] += Cost;
+ }
+
+ // Now update the default register mapping tracker.
+ RegisterFiles[0].NumUsedPhysRegs -= Cost;
+ FreedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::addRegisterWrite(WriteRef Write,
+ MutableArrayRef<unsigned> UsedPhysRegs) {
+ WriteState &WS = *Write.getWriteState();
+ unsigned RegID = WS.getRegisterID();
+ assert(RegID && "Adding an invalid register definition?");
+
+ LLVM_DEBUG({
+ dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
+ << ", " << MRI.getName(RegID) << "]\n";
+ });
+
+ // If RenameAs is equal to RegID, then RegID is subject to register renaming
+ // and false dependencies on RegID are all eliminated.
+
+ // If RenameAs references the invalid register, then we optimistically assume
+ // that it can be renamed. In the absence of tablegen descriptors for register
+ // files, RenameAs is always set to the invalid register ID. In all other
+ // cases, RenameAs must be either equal to RegID, or it must reference a
+ // super-register of RegID.
+
+ // If RenameAs is a super-register of RegID, then a write to RegID has always
+ // a false dependency on RenameAs. The only exception is for when the write
+ // implicitly clears the upper portion of the underlying register.
+ // If a write clears its super-registers, then it is renamed as `RenameAs`.
+ bool IsWriteZero = WS.isWriteZero();
+ bool IsEliminated = WS.isEliminated();
+ bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ WS.setPRF(RRI.IndexPlusCost.first);
+
+ if (RRI.RenameAs && RRI.RenameAs != RegID) {
+ RegID = RRI.RenameAs;
+ WriteRef &OtherWrite = RegisterMappings[RegID].first;
+
+ if (!WS.clearsSuperRegisters()) {
+ // The processor keeps the definition of `RegID` together with register
+ // `RenameAs`. Since this partial write is not renamed, no physical
+ // register is allocated.
+ ShouldAllocatePhysRegs = false;
+
+ WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
+ // This partial write has a false dependency on RenameAs.
+ assert(!IsEliminated && "Unexpected partial update!");
+ OtherWS->addUser(OtherWrite.getSourceIndex(), &WS);
+ }
+ }
+ }
+
+ // Update zero registers.
+ unsigned ZeroRegisterID =
+ WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
+ if (IsWriteZero) {
+ ZeroRegisters.setBit(ZeroRegisterID);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.setBit(*I);
+ } else {
+ ZeroRegisters.clearBit(ZeroRegisterID);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.clearBit(*I);
+ }
+
+ // If this is move has been eliminated, then the call to tryEliminateMove
+ // should have already updated all the register mappings.
+ if (!IsEliminated) {
+ // Update the mapping for register RegID including its sub-registers.
+ RegisterMappings[RegID].first = Write;
+ RegisterMappings[RegID].second.AliasRegID = 0U;
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ RegisterMappings[*I].first = Write;
+ RegisterMappings[*I].second.AliasRegID = 0U;
+ }
+
+ // No physical registers are allocated for instructions that are optimized
+ // in hardware. For example, zero-latency data-dependency breaking
+ // instructions don't consume physical registers.
+ if (ShouldAllocatePhysRegs)
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+ }
+
+ if (!WS.clearsSuperRegisters())
+ return;
+
+ for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ if (!IsEliminated) {
+ RegisterMappings[*I].first = Write;
+ RegisterMappings[*I].second.AliasRegID = 0U;
+ }
+
+ if (IsWriteZero)
+ ZeroRegisters.setBit(*I);
+ else
+ ZeroRegisters.clearBit(*I);
+ }
+}
+
+void RegisterFile::removeRegisterWrite(
+ const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+ // Early exit if this write was eliminated. A write eliminated at register
+ // renaming stage generates an alias, and it is not added to the PRF.
+ if (WS.isEliminated())
+ return;
+
+ unsigned RegID = WS.getRegisterID();
+
+ assert(RegID != 0 && "Invalidating an already invalid register?");
+ assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
+ "Invalidating a write of unknown cycles!");
+ assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
+
+ bool ShouldFreePhysRegs = !WS.isWriteZero();
+ unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+ if (RenameAs && RenameAs != RegID) {
+ RegID = RenameAs;
+
+ if (!WS.clearsSuperRegisters()) {
+ // Keep the definition of `RegID` together with register `RenameAs`.
+ ShouldFreePhysRegs = false;
+ }
+ }
+
+ if (ShouldFreePhysRegs)
+ freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
+
+ WriteRef &WR = RegisterMappings[RegID].first;
+ if (WR.getWriteState() == &WS)
+ WR.invalidate();
+
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
+ }
+
+ if (!WS.clearsSuperRegisters())
+ return;
+
+ for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
+ }
+}
+
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
+ const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
+ const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
+
+ // From and To must be owned by the same PRF.
+ const RegisterRenamingInfo &RRIFrom = RMFrom.second;
+ const RegisterRenamingInfo &RRITo = RMTo.second;
+ unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
+ if (RegisterFileIndex != RRITo.IndexPlusCost.first)
+ return false;
+
+ // We only allow move elimination for writes that update a full physical
+ // register. On X86, move elimination is possible with 32-bit general purpose
+ // registers because writes to those registers are not partial writes. If a
+ // register move is a partial write, then we conservatively assume that move
+ // elimination fails, since it would either trigger a partial update, or the
+ // issue of a merge opcode.
+ //
+ // Note that this constraint may be lifted in future. For example, we could
+ // make this model more flexible, and let users customize the set of registers
+ // (i.e. register classes) that allow move elimination.
+ //
+ // For now, we assume that there is a strong correlation between registers
+ // that allow move elimination, and how those same registers are renamed in
+ // hardware.
+ if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
+ // Early exit if the PRF doesn't support move elimination for this register.
+ if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
+ return false;
+ if (!WS.clearsSuperRegisters())
+ return false;
+ }
+
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ if (RMT.MaxMoveEliminatedPerCycle &&
+ RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
+ return false;
+
+ bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
+ if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
+ return false;
+
+ // Construct an alias.
+ MCPhysReg AliasedReg =
+ RRIFrom.RenameAs ? RRIFrom.RenameAs : RS.getRegisterID();
+ MCPhysReg AliasReg = RRITo.RenameAs ? RRITo.RenameAs : WS.getRegisterID();
+
+ const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasedReg].second;
+ if (RMAlias.AliasRegID)
+ AliasedReg = RMAlias.AliasRegID;
+
+ RegisterMappings[AliasReg].second.AliasRegID = AliasedReg;
+ for (MCSubRegIterator I(AliasReg, &MRI); I.isValid(); ++I)
+ RegisterMappings[*I].second.AliasRegID = AliasedReg;
+
+ if (IsZeroMove) {
+ WS.setWriteZero();
+ RS.setReadZero();
+ }
+ WS.setEliminated();
+ RMT.NumMoveEliminated++;
+
+ return true;
+}
+
+void RegisterFile::collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const {
+ unsigned RegID = RS.getRegisterID();
+ assert(RegID && RegID < RegisterMappings.size());
+ LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
+ << MRI.getName(RegID) << '\n');
+
+ // Check if this is an alias.
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ if (RRI.AliasRegID)
+ RegID = RRI.AliasRegID;
+
+ const WriteRef &WR = RegisterMappings[RegID].first;
+ if (WR.isValid())
+ Writes.push_back(WR);
+
+ // Handle potential partial register updates.
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ const WriteRef &WR = RegisterMappings[*I].first;
+ if (WR.isValid())
+ Writes.push_back(WR);
+ }
+
+ // Remove duplicate entries and resize the input vector.
+ if (Writes.size() > 1) {
+ sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+ return Lhs.getWriteState() < Rhs.getWriteState();
+ });
+ auto It = std::unique(Writes.begin(), Writes.end());
+ Writes.resize(std::distance(Writes.begin(), It));
+ }
+
+ LLVM_DEBUG({
+ for (const WriteRef &WR : Writes) {
+ const WriteState &WS = *WR.getWriteState();
+ dbgs() << "[PRF] Found a dependent use of Register "
+ << MRI.getName(WS.getRegisterID()) << " (defined by instruction #"
+ << WR.getSourceIndex() << ")\n";
+ }
+ });
+}
+
+void RegisterFile::addRegisterRead(ReadState &RS,
+ const MCSubtargetInfo &STI) const {
+ unsigned RegID = RS.getRegisterID();
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ RS.setPRF(RRI.IndexPlusCost.first);
+ if (RS.isIndependentFromDef())
+ return;
+
+ if (ZeroRegisters[RS.getRegisterID()])
+ RS.setReadZero();
+
+ SmallVector<WriteRef, 4> DependentWrites;
+ collectWrites(RS, DependentWrites);
+ RS.setDependentWrites(DependentWrites.size());
+
+ // We know that this read depends on all the writes in DependentWrites.
+ // For each write, check if we have ReadAdvance information, and use it
+ // to figure out in how many cycles this read becomes available.
+ const ReadDescriptor &RD = RS.getDescriptor();
+ const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+ for (WriteRef &WR : DependentWrites) {
+ WriteState &WS = *WR.getWriteState();
+ unsigned WriteResID = WS.getWriteResourceID();
+ int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+ WS.addUser(WR.getSourceIndex(), &RS, ReadAdvance);
+ }
+}
+
+unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
+ SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
+
+ // Find how many new mappings must be created for each register file.
+ for (const unsigned RegID : Regs) {
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
+ if (Entry.first)
+ NumPhysRegs[Entry.first] += Entry.second;
+ NumPhysRegs[0] += Entry.second;
+ }
+
+ unsigned Response = 0;
+ for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+ unsigned NumRegs = NumPhysRegs[I];
+ if (!NumRegs)
+ continue;
+
+ const RegisterMappingTracker &RMT = RegisterFiles[I];
+ if (!RMT.NumPhysRegs) {
+ // The register file has an unbounded number of microarchitectural
+ // registers.
+ continue;
+ }
+
+ if (RMT.NumPhysRegs < NumRegs) {
+ // The current register file is too small. This may occur if the number of
+ // microarchitectural registers in register file #0 was changed by the
+ // users via flag -reg-file-size. Alternatively, the scheduling model
+ // specified a too small number of registers for this register file.
+ LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
+
+ // FIXME: Normalize the instruction register count to match the
+ // NumPhysRegs value. This is a highly unusual case, and is not expected
+ // to occur. This normalization is hiding an inconsistency in either the
+ // scheduling model or in the value that the user might have specified
+ // for NumPhysRegs.
+ NumRegs = RMT.NumPhysRegs;
+ }
+
+ if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
+ Response |= (1U << I);
+ }
+
+ return Response;
+}
+
+#ifndef NDEBUG
+void RegisterFile::dump() const {
+ for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
+ const RegisterMapping &RM = RegisterMappings[I];
+ const RegisterRenamingInfo &RRI = RM.second;
+ if (ZeroRegisters[I]) {
+ dbgs() << MRI.getName(I) << ", " << I
+ << ", PRF=" << RRI.IndexPlusCost.first
+ << ", Cost=" << RRI.IndexPlusCost.second
+ << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I]
+ << ",";
+ RM.first.dump();
+ dbgs() << '\n';
+ }
+ }
+
+ for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+ dbgs() << "Register File #" << I;
+ const RegisterMappingTracker &RMT = RegisterFiles[I];
+ dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs
+ << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n';
+ }
+}
+#endif
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
new file mode 100644
index 000000000000..06f2476353d6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -0,0 +1,353 @@
+//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy. These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+ResourceStrategy::~ResourceStrategy() = default;
+
+static uint64_t selectImpl(uint64_t CandidateMask,
+ uint64_t &NextInSequenceMask) {
+ // The upper bit set in CandidateMask identifies our next candidate resource.
+ CandidateMask = 1ULL << getResourceStateIndex(CandidateMask);
+ NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+ return CandidateMask;
+}
+
+uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
+ // This method assumes that ReadyMask cannot be zero.
+ uint64_t CandidateMask = ReadyMask & NextInSequenceMask;
+ if (CandidateMask)
+ return selectImpl(CandidateMask, NextInSequenceMask);
+
+ NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+ RemovedFromNextInSequence = 0;
+ CandidateMask = ReadyMask & NextInSequenceMask;
+ if (CandidateMask)
+ return selectImpl(CandidateMask, NextInSequenceMask);
+
+ NextInSequenceMask = ResourceUnitMask;
+ CandidateMask = ReadyMask & NextInSequenceMask;
+ return selectImpl(CandidateMask, NextInSequenceMask);
+}
+
+void DefaultResourceStrategy::used(uint64_t Mask) {
+ if (Mask > NextInSequenceMask) {
+ RemovedFromNextInSequence |= Mask;
+ return;
+ }
+
+ NextInSequenceMask &= (~Mask);
+ if (NextInSequenceMask)
+ return;
+
+ NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+ RemovedFromNextInSequence = 0;
+}
+
+ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
+ uint64_t Mask)
+ : ProcResourceDescIndex(Index), ResourceMask(Mask),
+ BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) {
+ if (IsAGroup) {
+ ResourceSizeMask =
+ ResourceMask ^ 1ULL << getResourceStateIndex(ResourceMask);
+ } else {
+ ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
+ }
+ ReadyMask = ResourceSizeMask;
+ AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
+ Unavailable = false;
+}
+
+bool ResourceState::isReady(unsigned NumUnits) const {
+ return (!isReserved() || isADispatchHazard()) &&
+ countPopulation(ReadyMask) >= NumUnits;
+}
+
+ResourceStateEvent ResourceState::isBufferAvailable() const {
+ if (isADispatchHazard() && isReserved())
+ return RS_RESERVED;
+ if (!isBuffered() || AvailableSlots)
+ return RS_BUFFER_AVAILABLE;
+ return RS_BUFFER_UNAVAILABLE;
+}
+
+#ifndef NDEBUG
+void ResourceState::dump() const {
+ dbgs() << "MASK=" << format_hex(ResourceMask, 16)
+ << ", SZMASK=" << format_hex(ResourceSizeMask, 16)
+ << ", RDYMASK=" << format_hex(ReadyMask, 16)
+ << ", BufferSize=" << BufferSize
+ << ", AvailableSlots=" << AvailableSlots
+ << ", Reserved=" << Unavailable << '\n';
+}
+#endif
+
+static std::unique_ptr<ResourceStrategy>
+getStrategyFor(const ResourceState &RS) {
+ if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
+ return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+ return std::unique_ptr<ResourceStrategy>(nullptr);
+}
+
+ResourceManager::ResourceManager(const MCSchedModel &SM)
+ : Resources(SM.getNumProcResourceKinds() - 1),
+ Strategies(SM.getNumProcResourceKinds() - 1),
+ Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
+ ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
+ ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
+ ProcResUnitMask(0), ReservedResourceGroups(0) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ // initialize vector ResIndex2ProcResID.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned Index = getResourceStateIndex(ProcResID2Mask[I]);
+ ResIndex2ProcResID[Index] = I;
+ }
+
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = ProcResID2Mask[I];
+ unsigned Index = getResourceStateIndex(Mask);
+ Resources[Index] =
+ llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+ Strategies[Index] = getStrategyFor(*Resources[Index]);
+ }
+
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = ProcResID2Mask[I];
+ unsigned Index = getResourceStateIndex(Mask);
+ const ResourceState &RS = *Resources[Index];
+ if (!RS.isAResourceGroup()) {
+ ProcResUnitMask |= Mask;
+ continue;
+ }
+
+ uint64_t GroupMaskIdx = 1ULL << Index;
+ Mask -= GroupMaskIdx;
+ while (Mask) {
+ // Extract lowest set isolated bit.
+ uint64_t Unit = Mask & (-Mask);
+ unsigned IndexUnit = getResourceStateIndex(Unit);
+ Resource2Groups[IndexUnit] |= GroupMaskIdx;
+ Mask ^= Unit;
+ }
+ }
+
+ AvailableProcResUnits = ProcResUnitMask;
+}
+
+void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+ uint64_t ResourceMask) {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ assert(Index < Resources.size() && "Invalid processor resource index!");
+ assert(S && "Unexpected null strategy in input!");
+ Strategies[Index] = std::move(S);
+}
+
+unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
+ return ResIndex2ProcResID[getResourceStateIndex(Mask)];
+}
+
+unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
+ return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
+}
+
+// Returns the actual resource consumed by this Use.
+// First, is the primary resource ID.
+// Second, is the specific sub-resource ID.
+ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
+ unsigned Index = getResourceStateIndex(ResourceID);
+ assert(Index < Resources.size() && "Invalid resource use!");
+ ResourceState &RS = *Resources[Index];
+ assert(RS.isReady() && "No available units to select!");
+
+ // Special case where RS is not a group, and it only declares a single
+ // resource unit.
+ if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
+ return std::make_pair(ResourceID, RS.getReadyMask());
+
+ uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
+ if (RS.isAResourceGroup())
+ return selectPipe(SubResourceID);
+ return std::make_pair(ResourceID, SubResourceID);
+}
+
+void ResourceManager::use(const ResourceRef &RR) {
+ // Mark the sub-resource referenced by RR as used.
+ unsigned RSID = getResourceStateIndex(RR.first);
+ ResourceState &RS = *Resources[RSID];
+ RS.markSubResourceAsUsed(RR.second);
+ // Remember to update the resource strategy for non-group resources with
+ // multiple units.
+ if (RS.getNumUnits() > 1)
+ Strategies[RSID]->used(RR.second);
+
+ // If there are still available units in RR.first,
+ // then we are done.
+ if (RS.isReady())
+ return;
+
+ AvailableProcResUnits ^= RR.first;
+
+ // Notify groups that RR.first is no longer available.
+ uint64_t Users = Resource2Groups[RSID];
+ while (Users) {
+ // Extract lowest set isolated bit.
+ unsigned GroupIndex = getResourceStateIndex(Users & (-Users));
+ ResourceState &CurrentUser = *Resources[GroupIndex];
+ CurrentUser.markSubResourceAsUsed(RR.first);
+ Strategies[GroupIndex]->used(RR.first);
+ // Reset lowest set bit.
+ Users &= Users - 1;
+ }
+}
+
+void ResourceManager::release(const ResourceRef &RR) {
+ unsigned RSID = getResourceStateIndex(RR.first);
+ ResourceState &RS = *Resources[RSID];
+ bool WasFullyUsed = !RS.isReady();
+ RS.releaseSubResource(RR.second);
+ if (!WasFullyUsed)
+ return;
+
+ AvailableProcResUnits ^= RR.first;
+
+ // Notify groups that RR.first is now available again.
+ uint64_t Users = Resource2Groups[RSID];
+ while (Users) {
+ unsigned GroupIndex = getResourceStateIndex(Users & (-Users));
+ ResourceState &CurrentUser = *Resources[GroupIndex];
+ CurrentUser.releaseSubResource(RR.first);
+ Users &= Users - 1;
+ }
+}
+
+ResourceStateEvent
+ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
+ ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
+ for (uint64_t Buffer : Buffers) {
+ ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+ Result = RS.isBufferAvailable();
+ if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
+ break;
+ }
+ return Result;
+}
+
+void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
+ for (const uint64_t Buffer : Buffers) {
+ ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+ assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
+ RS.reserveBuffer();
+
+ if (RS.isADispatchHazard()) {
+ assert(!RS.isReserved());
+ RS.setReserved();
+ }
+ }
+}
+
+void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
+ for (const uint64_t R : Buffers)
+ Resources[getResourceStateIndex(R)]->releaseBuffer();
+}
+
+uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
+ uint64_t BusyResourceMask = 0;
+ for (const std::pair<uint64_t, const ResourceUsage> &E : Desc.Resources) {
+ unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+ unsigned Index = getResourceStateIndex(E.first);
+ if (!Resources[Index]->isReady(NumUnits))
+ BusyResourceMask |= E.first;
+ }
+
+ BusyResourceMask &= ProcResUnitMask;
+ if (BusyResourceMask)
+ return BusyResourceMask;
+ return Desc.UsedProcResGroups & ReservedResourceGroups;
+}
+
+void ResourceManager::issueInstruction(
+ const InstrDesc &Desc,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
+ for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
+ const CycleSegment &CS = R.second.CS;
+ if (!CS.size()) {
+ releaseResource(R.first);
+ continue;
+ }
+
+ assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+ if (!R.second.isReserved()) {
+ ResourceRef Pipe = selectPipe(R.first);
+ use(Pipe);
+ BusyResources[Pipe] += CS.size();
+ Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
+ Pipe, ResourceCycles(CS.size())));
+ } else {
+ assert((countPopulation(R.first) > 1) && "Expected a group!");
+ // Mark this group as reserved.
+ assert(R.second.isReserved());
+ reserveResource(R.first);
+ BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+ }
+ }
+}
+
+void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
+ for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
+ if (BR.second)
+ BR.second--;
+ if (!BR.second) {
+ // Release this resource.
+ const ResourceRef &RR = BR.first;
+
+ if (countPopulation(RR.first) == 1)
+ release(RR);
+
+ releaseResource(RR.first);
+ ResourcesFreed.push_back(RR);
+ }
+ }
+
+ for (const ResourceRef &RF : ResourcesFreed)
+ BusyResources.erase(RF);
+}
+
+void ResourceManager::reserveResource(uint64_t ResourceID) {
+ const unsigned Index = getResourceStateIndex(ResourceID);
+ ResourceState &Resource = *Resources[Index];
+ assert(Resource.isAResourceGroup() && !Resource.isReserved() &&
+ "Unexpected resource found!");
+ Resource.setReserved();
+ ReservedResourceGroups ^= 1ULL << Index;
+}
+
+void ResourceManager::releaseResource(uint64_t ResourceID) {
+ const unsigned Index = getResourceStateIndex(ResourceID);
+ ResourceState &Resource = *Resources[Index];
+ Resource.clearReserved();
+ if (Resource.isAResourceGroup())
+ ReservedResourceGroups ^= 1ULL << Index;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
new file mode 100644
index 000000000000..068c5062ccdf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -0,0 +1,87 @@
+//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
+ : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
+ AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
+ // Check if the scheduling model provides extra information about the machine
+ // processor. If so, then use that information to set the reorder buffer size
+ // and the maximum number of instructions retired per cycle.
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (EPI.ReorderBufferSize)
+ AvailableSlots = EPI.ReorderBufferSize;
+ MaxRetirePerCycle = EPI.MaxRetirePerCycle;
+ }
+
+ assert(AvailableSlots && "Invalid reorder buffer size!");
+ Queue.resize(AvailableSlots);
+}
+
+// Reserves a number of slots, and returns a new token.
+unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
+ unsigned NumMicroOps) {
+ assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!");
+ unsigned NormalizedQuantity =
+ std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
+ // Zero latency instructions may have zero uOps. Artificially bump this
+ // value to 1. Although zero latency instructions don't consume scheduler
+ // resources, they still consume one slot in the retire queue.
+ NormalizedQuantity = std::max(NormalizedQuantity, 1U);
+ unsigned TokenID = NextAvailableSlotIdx;
+ Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false};
+ NextAvailableSlotIdx += NormalizedQuantity;
+ NextAvailableSlotIdx %= Queue.size();
+ AvailableSlots -= NormalizedQuantity;
+ return TokenID;
+}
+
+const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
+ return Queue[CurrentInstructionSlotIdx];
+}
+
+void RetireControlUnit::consumeCurrentToken() {
+ RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
+ assert(Current.NumSlots && "Reserved zero slots?");
+ assert(Current.IR && "Invalid RUToken in the RCU queue.");
+ Current.IR.getInstruction()->retire();
+
+ // Update the slot index to be the next item in the circular queue.
+ CurrentInstructionSlotIdx += Current.NumSlots;
+ CurrentInstructionSlotIdx %= Queue.size();
+ AvailableSlots += Current.NumSlots;
+}
+
+void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
+ assert(Queue.size() > TokenID);
+ assert(Queue[TokenID].Executed == false && Queue[TokenID].IR);
+ Queue[TokenID].Executed = true;
+}
+
+#ifndef NDEBUG
+void RetireControlUnit::dump() const {
+ dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
+ << ", Available Slots=" << AvailableSlots << " }\n";
+}
+#endif
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
new file mode 100644
index 000000000000..0f0f2ffb8325
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -0,0 +1,343 @@
+//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A scheduler for processor resource units and processor resource groups.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
+ // Ensure we have a valid (non-null) strategy object.
+ Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+}
+
+// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
+SchedulerStrategy::~SchedulerStrategy() = default;
+DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
+
+#ifndef NDEBUG
+void Scheduler::dump() const {
+ dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+ dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+ dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
+ Resources->dump();
+}
+#endif
+
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
+ ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers);
+ HadTokenStall = RSE != RS_BUFFER_AVAILABLE;
+
+ switch (RSE) {
+ case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
+ return Scheduler::SC_BUFFERS_FULL;
+ case ResourceStateEvent::RS_RESERVED:
+ return Scheduler::SC_DISPATCH_GROUP_STALL;
+ case ResourceStateEvent::RS_BUFFER_AVAILABLE:
+ break;
+ }
+
+ // Give lower priority to LSUnit stall events.
+ LSUnit::Status LSS = LSU.isAvailable(IR);
+ HadTokenStall = LSS != LSUnit::LSU_AVAILABLE;
+
+ switch (LSS) {
+ case LSUnit::LSU_LQUEUE_FULL:
+ return Scheduler::SC_LOAD_QUEUE_FULL;
+ case LSUnit::LSU_SQUEUE_FULL:
+ return Scheduler::SC_STORE_QUEUE_FULL;
+ case LSUnit::LSU_AVAILABLE:
+ return Scheduler::SC_AVAILABLE;
+ }
+
+ llvm_unreachable("Don't know how to process this LSU state result!");
+}
+
+void Scheduler::issueInstructionImpl(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
+ Instruction *IS = IR.getInstruction();
+ const InstrDesc &D = IS->getDesc();
+
+ // Issue the instruction and collect all the consumed resources
+ // into a vector. That vector is then used to notify the listener.
+ Resources->issueInstruction(D, UsedResources);
+
+ // Notify the instruction that it started executing.
+ // This updates the internal state of each write.
+ IS->execute(IR.getSourceIndex());
+
+ IS->computeCriticalRegDep();
+
+ if (IS->isMemOp()) {
+ LSU.onInstructionIssued(IR);
+ const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID());
+ IS->setCriticalMemDep(Group.getCriticalPredecessor());
+ }
+
+ if (IS->isExecuting())
+ IssuedSet.emplace_back(IR);
+ else if (IS->isExecuted())
+ LSU.onInstructionExecuted(IR);
+}
+
+// Release the buffered resources and issue the instruction.
+void Scheduler::issueInstruction(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+ SmallVectorImpl<InstRef> &PendingInstructions,
+ SmallVectorImpl<InstRef> &ReadyInstructions) {
+ const Instruction &Inst = *IR.getInstruction();
+ bool HasDependentUsers = Inst.hasDependentUsers();
+ HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR);
+
+ Resources->releaseBuffers(Inst.getDesc().Buffers);
+ issueInstructionImpl(IR, UsedResources);
+ // Instructions that have been issued during this cycle might have unblocked
+ // other dependent instructions. Dependent instructions may be issued during
+ // this same cycle if operands have ReadAdvance entries. Promote those
+ // instructions to the ReadySet and notify the caller that those are ready.
+ if (HasDependentUsers)
+ if (promoteToPendingSet(PendingInstructions))
+ promoteToReadySet(ReadyInstructions);
+}
+
+bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+ // Scan the set of waiting instructions and promote them to the
+ // ready set if operands are all ready.
+ unsigned PromotedElements = 0;
+ for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+
+ // Check if there are unsolved register dependencies.
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isReady() && !IS.updatePending()) {
+ ++I;
+ continue;
+ }
+ // Check if there are unsolved memory dependencies.
+ if (IS.isMemOp() && !LSU.isReady(IR)) {
+ ++I;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " promoted to the READY set.\n");
+
+ Ready.emplace_back(IR);
+ ReadySet.emplace_back(IR);
+
+ IR.invalidate();
+ ++PromotedElements;
+ std::iter_swap(I, E - PromotedElements);
+ }
+
+ PendingSet.resize(PendingSet.size() - PromotedElements);
+ return PromotedElements;
+}
+
+bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) {
+ // Scan the set of waiting instructions and promote them to the
+ // pending set if operands are all ready.
+ unsigned RemovedElements = 0;
+ for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+
+ // Check if this instruction is now ready. In case, force
+ // a transition in state using method 'updateDispatched()'.
+ Instruction &IS = *IR.getInstruction();
+ if (IS.isDispatched() && !IS.updateDispatched()) {
+ ++I;
+ continue;
+ }
+
+ if (IS.isMemOp() && LSU.isWaiting(IR)) {
+ ++I;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " promoted to the PENDING set.\n");
+
+ Pending.emplace_back(IR);
+ PendingSet.emplace_back(IR);
+
+ IR.invalidate();
+ ++RemovedElements;
+ std::iter_swap(I, E - RemovedElements);
+ }
+
+ WaitSet.resize(WaitSet.size() - RemovedElements);
+ return RemovedElements;
+}
+
+InstRef Scheduler::select() {
+ unsigned QueueIndex = ReadySet.size();
+ for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+ InstRef &IR = ReadySet[I];
+ if (QueueIndex == ReadySet.size() ||
+ Strategy->compare(IR, ReadySet[QueueIndex])) {
+ Instruction &IS = *IR.getInstruction();
+ uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc());
+ if (BusyResourceMask)
+ IS.setCriticalResourceMask(BusyResourceMask);
+ BusyResourceUnits |= BusyResourceMask;
+ if (!BusyResourceMask)
+ QueueIndex = I;
+ }
+ }
+
+ if (QueueIndex == ReadySet.size())
+ return InstRef();
+
+ // We found an instruction to issue.
+ InstRef IR = ReadySet[QueueIndex];
+ std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+ ReadySet.pop_back();
+ return IR;
+}
+
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+ unsigned RemovedElements = 0;
+ for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isExecuted()) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " is still executing.\n");
+ ++I;
+ continue;
+ }
+
+ // Instruction IR has completed execution.
+ LSU.onInstructionExecuted(IR);
+ Executed.emplace_back(IR);
+ ++RemovedElements;
+ IR.invalidate();
+ std::iter_swap(I, E - RemovedElements);
+ }
+
+ IssuedSet.resize(IssuedSet.size() - RemovedElements);
+}
+
+uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) {
+ Insts.insert(Insts.end(), ReadySet.begin(), ReadySet.end());
+ return BusyResourceUnits;
+}
+
+void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
+ SmallVectorImpl<InstRef> &MemDeps) {
+ const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet;
+ for (const InstRef &IR : make_range(PendingSet.begin(), EndIt)) {
+ const Instruction &IS = *IR.getInstruction();
+ if (Resources->checkAvailability(IS.getDesc()))
+ continue;
+
+ if (IS.isMemOp() && LSU.isPending(IR))
+ MemDeps.emplace_back(IR);
+
+ if (IS.isPending())
+ RegDeps.emplace_back(IR);
+ }
+}
+
+void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+ SmallVectorImpl<InstRef> &Executed,
+ SmallVectorImpl<InstRef> &Pending,
+ SmallVectorImpl<InstRef> &Ready) {
+ LSU.cycleEvent();
+
+ // Release consumed resources.
+ Resources->cycleEvent(Freed);
+
+ for (InstRef &IR : IssuedSet)
+ IR.getInstruction()->cycleEvent();
+ updateIssuedSet(Executed);
+
+ for (InstRef &IR : PendingSet)
+ IR.getInstruction()->cycleEvent();
+
+ for (InstRef &IR : WaitSet)
+ IR.getInstruction()->cycleEvent();
+
+ promoteToPendingSet(Pending);
+ promoteToReadySet(Ready);
+
+ NumDispatchedToThePendingSet = 0;
+ BusyResourceUnits = 0;
+}
+
+bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.isZeroLatency())
+ return true;
+ // Instructions that use an in-order dispatch/issue processor resource must be
+ // issued immediately to the pipeline(s). Any other in-order buffered
+ // resources (i.e. BufferSize=1) is consumed.
+ return Desc.MustIssueImmediately;
+}
+
+bool Scheduler::dispatch(InstRef &IR) {
+ Instruction &IS = *IR.getInstruction();
+ const InstrDesc &Desc = IS.getDesc();
+ Resources->reserveBuffers(Desc.Buffers);
+
+ // If necessary, reserve queue entries in the load-store unit (LSU).
+ if (IS.isMemOp())
+ IS.setLSUTokenID(LSU.dispatch(IR));
+
+ if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+ WaitSet.push_back(IR);
+ return false;
+ }
+
+ if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
+ << " to the PendingSet\n");
+ PendingSet.push_back(IR);
+ ++NumDispatchedToThePendingSet;
+ return false;
+ }
+
+ assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) &&
+ "Unexpected internal state found!");
+ // Don't add a zero-latency instruction to the Ready queue.
+ // A zero-latency instruction doesn't consume any scheduler resources. That is
+ // because it doesn't need to be executed, and it is often removed at register
+ // renaming stage. For example, register-register moves are often optimized at
+ // register renaming stage by simply updating register aliases. On some
+ // targets, zero-idiom instructions (for example: a xor that clears the value
+ // of a register) are treated specially, and are often eliminated at register
+ // renaming stage.
+ if (!mustIssueImmediately(IR)) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+ ReadySet.push_back(IR);
+ }
+
+ return true;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
new file mode 100644
index 000000000000..829920366c90
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
@@ -0,0 +1,717 @@
+//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstrBuilder interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
+ const llvm::MCInstrInfo &mcii,
+ const llvm::MCRegisterInfo &mri,
+ const llvm::MCInstrAnalysis *mcia)
+ : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
+ FirstReturnInst(true) {
+ const MCSchedModel &SM = STI.getSchedModel();
+ ProcResourceMasks.resize(SM.getNumProcResourceKinds());
+ computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+}
+
+static void initializeUsedResources(InstrDesc &ID,
+ const MCSchedClassDesc &SCDesc,
+ const MCSubtargetInfo &STI,
+ ArrayRef<uint64_t> ProcResourceMasks) {
+ const MCSchedModel &SM = STI.getSchedModel();
+
+ // Populate resources consumed.
+ using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
+ std::vector<ResourcePlusCycles> Worklist;
+
+ // Track cycles contributed by resources that are in a "Super" relationship.
+ // This is required if we want to correctly match the behavior of method
+ // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
+ // of "consumed" processor resources and resource cycles, the logic in
+ // ExpandProcResource() doesn't update the number of resource cycles
+ // contributed by a "Super" resource to a group.
+ // We need to take this into account when we find that a processor resource is
+ // part of a group, and it is also used as the "Super" of other resources.
+ // This map stores the number of cycles contributed by sub-resources that are
+ // part of a "Super" resource. The key value is the "Super" resource mask ID.
+ DenseMap<uint64_t, unsigned> SuperResources;
+
+ unsigned NumProcResources = SM.getNumProcResourceKinds();
+ APInt Buffers(NumProcResources, 0);
+
+ bool AllInOrderResources = true;
+ bool AnyDispatchHazards = false;
+ for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
+ const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
+ const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+ if (!PRE->Cycles) {
+#ifndef NDEBUG
+ WithColor::warning()
+ << "Ignoring invalid write of zero cycles on processor resource "
+ << PR.Name << "\n";
+ WithColor::note() << "found in scheduling class " << SCDesc.Name
+ << " (write index #" << I << ")\n";
+#endif
+ continue;
+ }
+
+ uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
+ if (PR.BufferSize < 0) {
+ AllInOrderResources = false;
+ } else {
+ Buffers.setBit(PRE->ProcResourceIdx);
+ AnyDispatchHazards |= (PR.BufferSize == 0);
+ AllInOrderResources &= (PR.BufferSize <= 1);
+ }
+
+ CycleSegment RCy(0, PRE->Cycles, false);
+ Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
+ if (PR.SuperIdx) {
+ uint64_t Super = ProcResourceMasks[PR.SuperIdx];
+ SuperResources[Super] += PRE->Cycles;
+ }
+ }
+
+ ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
+
+ // Sort elements by mask popcount, so that we prioritize resource units over
+ // resource groups, and smaller groups over larger groups.
+ sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+ unsigned popcntA = countPopulation(A.first);
+ unsigned popcntB = countPopulation(B.first);
+ if (popcntA < popcntB)
+ return true;
+ if (popcntA > popcntB)
+ return false;
+ return A.first < B.first;
+ });
+
+ uint64_t UsedResourceUnits = 0;
+ uint64_t UsedResourceGroups = 0;
+
+ // Remove cycles contributed by smaller resources.
+ for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
+ ResourcePlusCycles &A = Worklist[I];
+ if (!A.second.size()) {
+ assert(countPopulation(A.first) > 1 && "Expected a group!");
+ UsedResourceGroups |= PowerOf2Floor(A.first);
+ continue;
+ }
+
+ ID.Resources.emplace_back(A);
+ uint64_t NormalizedMask = A.first;
+ if (countPopulation(A.first) == 1) {
+ UsedResourceUnits |= A.first;
+ } else {
+ // Remove the leading 1 from the resource group mask.
+ NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ UsedResourceGroups |= (A.first ^ NormalizedMask);
+ }
+
+ for (unsigned J = I + 1; J < E; ++J) {
+ ResourcePlusCycles &B = Worklist[J];
+ if ((NormalizedMask & B.first) == NormalizedMask) {
+ B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
+ if (countPopulation(B.first) > 1)
+ B.second.NumUnits++;
+ }
+ }
+ }
+
+ ID.UsedProcResUnits = UsedResourceUnits;
+ ID.UsedProcResGroups = UsedResourceGroups;
+
+ // A SchedWrite may specify a number of cycles in which a resource group
+ // is reserved. For example (on target x86; cpu Haswell):
+ //
+ // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
+ // let ResourceCycles = [2, 2, 3];
+ // }
+ //
+ // This means:
+ // Resource units HWPort0 and HWPort1 are both used for 2cy.
+ // Resource group HWPort01 is the union of HWPort0 and HWPort1.
+ // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
+ // will not be usable for 2 entire cycles from instruction issue.
+ //
+ // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
+ // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
+ // extra delay on top of the 2 cycles latency.
+ // During those extra cycles, HWPort01 is not usable by other instructions.
+ for (ResourcePlusCycles &RPC : ID.Resources) {
+ if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
+ // Remove the leading 1 from the resource group mask.
+ uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+ if ((Mask & UsedResourceUnits) == Mask)
+ RPC.second.setReserved();
+ }
+ }
+
+ // Identify extra buffers that are consumed through super resources.
+ for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
+ for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
+ const MCProcResourceDesc &PR = *SM.getProcResource(I);
+ if (PR.BufferSize == -1)
+ continue;
+
+ uint64_t Mask = ProcResourceMasks[I];
+ if (Mask != SR.first && ((Mask & SR.first) == SR.first))
+ Buffers.setBit(I);
+ }
+ }
+
+ // Now set the buffers.
+ if (unsigned NumBuffers = Buffers.countPopulation()) {
+ ID.Buffers.resize(NumBuffers);
+ for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
+ if (Buffers[I]) {
+ --NumBuffers;
+ ID.Buffers[NumBuffers] = ProcResourceMasks[I];
+ }
+ }
+ }
+
+ LLVM_DEBUG({
+ for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
+ dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
+ << "Reserved=" << R.second.isReserved() << ", "
+ << "#Units=" << R.second.NumUnits << ", "
+ << "cy=" << R.second.size() << '\n';
+ for (const uint64_t R : ID.Buffers)
+ dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
+ dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
+ dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
+ << '\n';
+ });
+}
+
+static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
+ const MCSchedClassDesc &SCDesc,
+ const MCSubtargetInfo &STI) {
+ if (MCDesc.isCall()) {
+ // We cannot estimate how long this call will take.
+ // Artificially set an arbitrarily high latency (100cy).
+ ID.MaxLatency = 100U;
+ return;
+ }
+
+ int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
+ ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
+}
+
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+ // Count register definitions, and skip non register operands in the process.
+ unsigned I, E;
+ unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+ const MCOperand &Op = MCI.getOperand(I);
+ if (Op.isReg())
+ --NumExplicitDefs;
+ }
+
+ if (NumExplicitDefs) {
+ return make_error<InstructionError<MCInst>>(
+ "Expected more register operand definitions.", MCI);
+ }
+
+ if (MCDesc.hasOptionalDef()) {
+ // Always assume that the optional definition is the last operand.
+ const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+ if (I == MCI.getNumOperands() || !Op.isReg()) {
+ std::string Message =
+ "expected a register operand for an optional definition. Instruction "
+ "has not been correctly analyzed.";
+ return make_error<InstructionError<MCInst>>(Message, MCI);
+ }
+ }
+
+ return ErrorSuccess();
+}
+
+void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+ unsigned SchedClassID) {
+ const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+ const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+
+ // Assumptions made by this algorithm:
+ // 1. The number of explicit and implicit register definitions in a MCInst
+ // matches the number of explicit and implicit definitions according to
+ // the opcode descriptor (MCInstrDesc).
+ // 2. Uses start at index #(MCDesc.getNumDefs()).
+ // 3. There can only be a single optional register definition, an it is
+ // always the last operand of the sequence (excluding extra operands
+ // contributed by variadic opcodes).
+ //
+ // These assumptions work quite well for most out-of-order in-tree targets
+ // like x86. This is mainly because the vast majority of instructions is
+ // expanded to MCInst using a straightforward lowering logic that preserves
+ // the ordering of the operands.
+ //
+ // About assumption 1.
+ // The algorithm allows non-register operands between register operand
+ // definitions. This helps to handle some special ARM instructions with
+ // implicit operand increment (-mtriple=armv7):
+ //
+ // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
+ // @ <MCOperand Reg:59>
+ // @ <MCOperand Imm:0> (!!)
+ // @ <MCOperand Reg:67>
+ // @ <MCOperand Imm:0>
+ // @ <MCOperand Imm:14>
+ // @ <MCOperand Reg:0>>
+ //
+ // MCDesc reports:
+ // 6 explicit operands.
+ // 1 optional definition
+ // 2 explicit definitions (!!)
+ //
+ // The presence of an 'Imm' operand between the two register definitions
+ // breaks the assumption that "register definitions are always at the
+ // beginning of the operand sequence".
+ //
+ // To workaround this issue, this algorithm ignores (i.e. skips) any
+ // non-register operands between register definitions. The optional
+ // definition is still at index #(NumOperands-1).
+ //
+ // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+ // That means, register R1 from the example is both read and written.
+ unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
+ unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
+ unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
+ if (MCDesc.hasOptionalDef())
+ TotalDefs++;
+
+ unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+ ID.Writes.resize(TotalDefs + NumVariadicOps);
+ // Iterate over the operands list, and skip non-register operands.
+ // The first NumExplictDefs register operands are expected to be register
+ // definitions.
+ unsigned CurrentDef = 0;
+ unsigned i = 0;
+ for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
+ const MCOperand &Op = MCI.getOperand(i);
+ if (!Op.isReg())
+ continue;
+
+ WriteDescriptor &Write = ID.Writes[CurrentDef];
+ Write.OpIndex = i;
+ if (CurrentDef < NumWriteLatencyEntries) {
+ const MCWriteLatencyEntry &WLE =
+ *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
+ // Conservatively default to MaxLatency.
+ Write.Latency =
+ WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+ Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+ } else {
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ }
+ Write.IsOptionalDef = false;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ CurrentDef++;
+ }
+
+ assert(CurrentDef == NumExplicitDefs &&
+ "Expected more register operand definitions.");
+ for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
+ unsigned Index = NumExplicitDefs + CurrentDef;
+ WriteDescriptor &Write = ID.Writes[Index];
+ Write.OpIndex = ~CurrentDef;
+ Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
+ if (Index < NumWriteLatencyEntries) {
+ const MCWriteLatencyEntry &WLE =
+ *STI.getWriteLatencyEntry(&SCDesc, Index);
+ // Conservatively default to MaxLatency.
+ Write.Latency =
+ WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+ Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+ } else {
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ }
+
+ Write.IsOptionalDef = false;
+ assert(Write.RegisterID != 0 && "Expected a valid phys register!");
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
+ << ", PhysReg=" << MRI.getName(Write.RegisterID)
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ if (MCDesc.hasOptionalDef()) {
+ WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+ Write.OpIndex = MCDesc.getNumOperands() - 1;
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ Write.IsOptionalDef = true;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ if (!NumVariadicOps)
+ return;
+
+ // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
+ // "unmodeledSideEffects', then this logic optimistically assumes that any
+ // extra register operands in the variadic sequence is not a register
+ // definition.
+ //
+ // Otherwise, we conservatively assume that any register operand from the
+ // variadic sequence is both a register read and a register write.
+ bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
+ !MCDesc.hasUnmodeledSideEffects();
+ CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
+ for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+ I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ WriteDescriptor &Write = ID.Writes[CurrentDef];
+ Write.OpIndex = OpIndex;
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ Write.IsOptionalDef = false;
+ ++CurrentDef;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ ID.Writes.resize(CurrentDef);
+}
+
+void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+ unsigned SchedClassID) {
+ const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+ unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
+ unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+ // Remove the optional definition.
+ if (MCDesc.hasOptionalDef())
+ --NumExplicitUses;
+ unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+ unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
+ ID.Reads.resize(TotalUses);
+ unsigned CurrentUse = 0;
+ for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
+ ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ ReadDescriptor &Read = ID.Reads[CurrentUse];
+ Read.OpIndex = OpIndex;
+ Read.UseIndex = I;
+ Read.SchedClassID = SchedClassID;
+ ++CurrentUse;
+ LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << '\n');
+ }
+
+ // For the purpose of ReadAdvance, implicit uses come directly after explicit
+ // uses. The "UseIndex" must be updated according to that implicit layout.
+ for (unsigned I = 0; I < NumImplicitUses; ++I) {
+ ReadDescriptor &Read = ID.Reads[CurrentUse + I];
+ Read.OpIndex = ~I;
+ Read.UseIndex = NumExplicitUses + I;
+ Read.RegisterID = MCDesc.getImplicitUses()[I];
+ Read.SchedClassID = SchedClassID;
+ LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << ", RegisterID="
+ << MRI.getName(Read.RegisterID) << '\n');
+ }
+
+ CurrentUse += NumImplicitUses;
+
+ // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
+ // "unmodeledSideEffects", then this logic optimistically assumes that any
+ // extra register operands in the variadic sequence are not register
+ // definition.
+
+ bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
+ !MCDesc.hasUnmodeledSideEffects();
+ for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+ I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ ReadDescriptor &Read = ID.Reads[CurrentUse];
+ Read.OpIndex = OpIndex;
+ Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
+ Read.SchedClassID = SchedClassID;
+ ++CurrentUse;
+ LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << '\n');
+ }
+
+ ID.Reads.resize(CurrentUse);
+}
+
+Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
+ const MCInst &MCI) const {
+ if (ID.NumMicroOps != 0)
+ return ErrorSuccess();
+
+ bool UsesMemory = ID.MayLoad || ID.MayStore;
+ bool UsesBuffers = !ID.Buffers.empty();
+ bool UsesResources = !ID.Resources.empty();
+ if (!UsesMemory && !UsesBuffers && !UsesResources)
+ return ErrorSuccess();
+
+ StringRef Message;
+ if (UsesMemory) {
+ Message = "found an inconsistent instruction that decodes "
+ "into zero opcodes and that consumes load/store "
+ "unit resources.";
+ } else {
+ Message = "found an inconsistent instruction that decodes "
+ "to zero opcodes and that consumes scheduler "
+ "resources.";
+ }
+
+ return make_error<InstructionError<MCInst>>(Message, MCI);
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+ assert(STI.getSchedModel().hasInstrSchedModel() &&
+ "Itineraries are not yet supported!");
+
+ // Obtain the instruction descriptor from the opcode.
+ unsigned short Opcode = MCI.getOpcode();
+ const MCInstrDesc &MCDesc = MCII.get(Opcode);
+ const MCSchedModel &SM = STI.getSchedModel();
+
+ // Then obtain the scheduling class information from the instruction.
+ unsigned SchedClassID = MCDesc.getSchedClass();
+ bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
+
+ // Try to solve variant scheduling classes.
+ if (IsVariant) {
+ unsigned CPUID = SM.getProcessorID();
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
+
+ if (!SchedClassID) {
+ return make_error<InstructionError<MCInst>>(
+ "unable to resolve scheduling class for write variant.", MCI);
+ }
+ }
+
+ // Check if this instruction is supported. Otherwise, report an error.
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
+ return make_error<InstructionError<MCInst>>(
+ "found an unsupported instruction in the input assembly sequence.",
+ MCI);
+ }
+
+ LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
+
+ // Create a new empty descriptor.
+ std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
+ ID->NumMicroOps = SCDesc.NumMicroOps;
+ ID->SchedClassID = SchedClassID;
+
+ if (MCDesc.isCall() && FirstCallInst) {
+ // We don't correctly model calls.
+ WithColor::warning() << "found a call in the input assembly sequence.\n";
+ WithColor::note() << "call instructions are not correctly modeled. "
+ << "Assume a latency of 100cy.\n";
+ FirstCallInst = false;
+ }
+
+ if (MCDesc.isReturn() && FirstReturnInst) {
+ WithColor::warning() << "found a return instruction in the input"
+ << " assembly sequence.\n";
+ WithColor::note() << "program counter updates are ignored.\n";
+ FirstReturnInst = false;
+ }
+
+ ID->MayLoad = MCDesc.mayLoad();
+ ID->MayStore = MCDesc.mayStore();
+ ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+ ID->BeginGroup = SCDesc.BeginGroup;
+ ID->EndGroup = SCDesc.EndGroup;
+
+ initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
+ computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+
+ if (Error Err = verifyOperands(MCDesc, MCI))
+ return std::move(Err);
+
+ populateWrites(*ID, MCI, SchedClassID);
+ populateReads(*ID, MCI, SchedClassID);
+
+ LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
+
+ // Sanity check on the instruction descriptor.
+ if (Error Err = verifyInstrDesc(*ID, MCI))
+ return std::move(Err);
+
+ // Now add the new descriptor.
+ bool IsVariadic = MCDesc.isVariadic();
+ if (!IsVariadic && !IsVariant) {
+ Descriptors[MCI.getOpcode()] = std::move(ID);
+ return *Descriptors[MCI.getOpcode()];
+ }
+
+ VariantDescriptors[&MCI] = std::move(ID);
+ return *VariantDescriptors[&MCI];
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+ if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
+ return *Descriptors[MCI.getOpcode()];
+
+ if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
+ return *VariantDescriptors[&MCI];
+
+ return createInstrDescImpl(MCI);
+}
+
+Expected<std::unique_ptr<Instruction>>
+InstrBuilder::createInstruction(const MCInst &MCI) {
+ Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+ if (!DescOrErr)
+ return DescOrErr.takeError();
+ const InstrDesc &D = *DescOrErr;
+ std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+
+ // Check if this is a dependency breaking instruction.
+ APInt Mask;
+
+ bool IsZeroIdiom = false;
+ bool IsDepBreaking = false;
+ if (MCIA) {
+ unsigned ProcID = STI.getSchedModel().getProcessorID();
+ IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
+ IsDepBreaking =
+ IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
+ if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
+ NewIS->setOptimizableMove();
+ }
+
+ // Initialize Reads first.
+ for (const ReadDescriptor &RD : D.Reads) {
+ int RegID = -1;
+ if (!RD.isImplicitRead()) {
+ // explicit read.
+ const MCOperand &Op = MCI.getOperand(RD.OpIndex);
+ // Skip non-register operands.
+ if (!Op.isReg())
+ continue;
+ RegID = Op.getReg();
+ } else {
+ // Implicit read.
+ RegID = RD.RegisterID;
+ }
+
+ // Skip invalid register operands.
+ if (!RegID)
+ continue;
+
+ // Okay, this is a register operand. Create a ReadState for it.
+ assert(RegID > 0 && "Invalid register ID found!");
+ NewIS->getUses().emplace_back(RD, RegID);
+ ReadState &RS = NewIS->getUses().back();
+
+ if (IsDepBreaking) {
+ // A mask of all zeroes means: explicit input operands are not
+ // independent.
+ if (Mask.isNullValue()) {
+ if (!RD.isImplicitRead())
+ RS.setIndependentFromDef();
+ } else {
+ // Check if this register operand is independent according to `Mask`.
+ // Note that Mask may not have enough bits to describe all explicit and
+ // implicit input operands. If this register operand doesn't have a
+ // corresponding bit in Mask, then conservatively assume that it is
+ // dependent.
+ if (Mask.getBitWidth() > RD.UseIndex) {
+ // Okay. This map describe register use `RD.UseIndex`.
+ if (Mask[RD.UseIndex])
+ RS.setIndependentFromDef();
+ }
+ }
+ }
+ }
+
+ // Early exit if there are no writes.
+ if (D.Writes.empty())
+ return std::move(NewIS);
+
+ // Track register writes that implicitly clear the upper portion of the
+ // underlying super-registers using an APInt.
+ APInt WriteMask(D.Writes.size(), 0);
+
+ // Now query the MCInstrAnalysis object to obtain information about which
+ // register writes implicitly clear the upper portion of a super-register.
+ if (MCIA)
+ MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
+
+ // Initialize writes.
+ unsigned WriteIndex = 0;
+ for (const WriteDescriptor &WD : D.Writes) {
+ unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
+ : MCI.getOperand(WD.OpIndex).getReg();
+ // Check if this is a optional definition that references NoReg.
+ if (WD.IsOptionalDef && !RegID) {
+ ++WriteIndex;
+ continue;
+ }
+
+ assert(RegID && "Expected a valid register ID!");
+ NewIS->getDefs().emplace_back(WD, RegID,
+ /* ClearsSuperRegs */ WriteMask[WriteIndex],
+ /* WritesZero */ IsZeroIdiom);
+ ++WriteIndex;
+ }
+
+ return std::move(NewIS);
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Instruction.cpp b/contrib/llvm-project/llvm/lib/MCA/Instruction.cpp
new file mode 100644
index 000000000000..001842bca318
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Instruction.cpp
@@ -0,0 +1,254 @@
+//===--------------------- Instruction.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines abstractions used by the Pipeline to model register reads,
+// register writes and instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+void WriteState::writeStartEvent(unsigned IID, unsigned RegID,
+ unsigned Cycles) {
+ CRD.IID = IID;
+ CRD.RegID = RegID;
+ CRD.Cycles = Cycles;
+ DependentWriteCyclesLeft = Cycles;
+ DependentWrite = nullptr;
+}
+
+void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
+ assert(DependentWrites);
+ assert(CyclesLeft == UNKNOWN_CYCLES);
+
+ // This read may be dependent on more than one write. This typically occurs
+ // when a definition is the result of multiple writes where at least one
+ // write does a partial register update.
+ // The HW is forced to do some extra bookkeeping to track of all the
+ // dependent writes, and implement a merging scheme for the partial writes.
+ --DependentWrites;
+ if (TotalCycles < Cycles) {
+ CRD.IID = IID;
+ CRD.RegID = RegID;
+ CRD.Cycles = Cycles;
+ TotalCycles = Cycles;
+ }
+
+ if (!DependentWrites) {
+ CyclesLeft = TotalCycles;
+ IsReady = !CyclesLeft;
+ }
+}
+
+void WriteState::onInstructionIssued(unsigned IID) {
+ assert(CyclesLeft == UNKNOWN_CYCLES);
+ // Update the number of cycles left based on the WriteDescriptor info.
+ CyclesLeft = getLatency();
+
+ // Now that the time left before write-back is known, notify
+ // all the users.
+ for (const std::pair<ReadState *, int> &User : Users) {
+ ReadState *RS = User.first;
+ unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
+ RS->writeStartEvent(IID, RegisterID, ReadCycles);
+ }
+
+ // Notify any writes that are in a false dependency with this write.
+ if (PartialWrite)
+ PartialWrite->writeStartEvent(IID, RegisterID, CyclesLeft);
+}
+
+void WriteState::addUser(unsigned IID, ReadState *User, int ReadAdvance) {
+ // If CyclesLeft is different than -1, then we don't need to
+ // update the list of users. We can just notify the user with
+ // the actual number of cycles left (which may be zero).
+ if (CyclesLeft != UNKNOWN_CYCLES) {
+ unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance);
+ User->writeStartEvent(IID, RegisterID, ReadCycles);
+ return;
+ }
+
+ Users.emplace_back(User, ReadAdvance);
+}
+
+void WriteState::addUser(unsigned IID, WriteState *User) {
+ if (CyclesLeft != UNKNOWN_CYCLES) {
+ User->writeStartEvent(IID, RegisterID, std::max(0, CyclesLeft));
+ return;
+ }
+
+ assert(!PartialWrite && "PartialWrite already set!");
+ PartialWrite = User;
+ User->setDependentWrite(this);
+}
+
+void WriteState::cycleEvent() {
+ // Note: CyclesLeft can be a negative number. It is an error to
+ // make it an unsigned quantity because users of this write may
+ // specify a negative ReadAdvance.
+ if (CyclesLeft != UNKNOWN_CYCLES)
+ CyclesLeft--;
+
+ if (DependentWriteCyclesLeft)
+ DependentWriteCyclesLeft--;
+}
+
+void ReadState::cycleEvent() {
+ // Update the total number of cycles.
+ if (DependentWrites && TotalCycles) {
+ --TotalCycles;
+ return;
+ }
+
+ // Bail out immediately if we don't know how many cycles are left.
+ if (CyclesLeft == UNKNOWN_CYCLES)
+ return;
+
+ if (CyclesLeft) {
+ --CyclesLeft;
+ IsReady = !CyclesLeft;
+ }
+}
+
+#ifndef NDEBUG
+void WriteState::dump() const {
+ dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID "
+ << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
+}
+
+void WriteRef::dump() const {
+ dbgs() << "IID=" << getSourceIndex() << ' ';
+ if (isValid())
+ getWriteState()->dump();
+ else
+ dbgs() << "(null)";
+}
+#endif
+
+const CriticalDependency &Instruction::computeCriticalRegDep() {
+ if (CriticalRegDep.Cycles)
+ return CriticalRegDep;
+
+ unsigned MaxLatency = 0;
+ for (const WriteState &WS : getDefs()) {
+ const CriticalDependency &WriteCRD = WS.getCriticalRegDep();
+ if (WriteCRD.Cycles > MaxLatency)
+ CriticalRegDep = WriteCRD;
+ }
+
+ for (const ReadState &RS : getUses()) {
+ const CriticalDependency &ReadCRD = RS.getCriticalRegDep();
+ if (ReadCRD.Cycles > MaxLatency)
+ CriticalRegDep = ReadCRD;
+ }
+
+ return CriticalRegDep;
+}
+
+void Instruction::dispatch(unsigned RCUToken) {
+ assert(Stage == IS_INVALID);
+ Stage = IS_DISPATCHED;
+ RCUTokenID = RCUToken;
+
+ // Check if input operands are already available.
+ if (updateDispatched())
+ updatePending();
+}
+
+void Instruction::execute(unsigned IID) {
+ assert(Stage == IS_READY);
+ Stage = IS_EXECUTING;
+
+ // Set the cycles left before the write-back stage.
+ CyclesLeft = getLatency();
+
+ for (WriteState &WS : getDefs())
+ WS.onInstructionIssued(IID);
+
+ // Transition to the "executed" stage if this is a zero-latency instruction.
+ if (!CyclesLeft)
+ Stage = IS_EXECUTED;
+}
+
+void Instruction::forceExecuted() {
+ assert(Stage == IS_READY && "Invalid internal state!");
+ CyclesLeft = 0;
+ Stage = IS_EXECUTED;
+}
+
+bool Instruction::updatePending() {
+ assert(isPending() && "Unexpected instruction stage found!");
+
+ if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
+ return false;
+
+ // A partial register write cannot complete before a dependent write.
+ if (!all_of(getDefs(), [](const WriteState &Def) { return Def.isReady(); }))
+ return false;
+
+ Stage = IS_READY;
+ return true;
+}
+
+bool Instruction::updateDispatched() {
+ assert(isDispatched() && "Unexpected instruction stage found!");
+
+ if (!all_of(getUses(), [](const ReadState &Use) {
+ return Use.isPending() || Use.isReady();
+ }))
+ return false;
+
+ // A partial register write cannot complete before a dependent write.
+ if (!all_of(getDefs(),
+ [](const WriteState &Def) { return !Def.getDependentWrite(); }))
+ return false;
+
+ Stage = IS_PENDING;
+ return true;
+}
+
+void Instruction::update() {
+ if (isDispatched())
+ updateDispatched();
+ if (isPending())
+ updatePending();
+}
+
+void Instruction::cycleEvent() {
+ if (isReady())
+ return;
+
+ if (isDispatched() || isPending()) {
+ for (ReadState &Use : getUses())
+ Use.cycleEvent();
+
+ for (WriteState &Def : getDefs())
+ Def.cycleEvent();
+
+ update();
+ return;
+ }
+
+ assert(isExecuting() && "Instruction not in-flight?");
+ assert(CyclesLeft && "Instruction already executed?");
+ for (WriteState &Def : getDefs())
+ Def.cycleEvent();
+ CyclesLeft--;
+ if (!CyclesLeft)
+ Stage = IS_EXECUTED;
+}
+
+const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp b/contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp
new file mode 100644
index 000000000000..22b9d0799f77
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp
@@ -0,0 +1,97 @@
+//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Pipeline::addEventListener(HWEventListener *Listener) {
+ if (Listener)
+ Listeners.insert(Listener);
+ for (auto &S : Stages)
+ S->addListener(Listener);
+}
+
+bool Pipeline::hasWorkToProcess() {
+ return any_of(Stages, [](const std::unique_ptr<Stage> &S) {
+ return S->hasWorkToComplete();
+ });
+}
+
+Expected<unsigned> Pipeline::run() {
+ assert(!Stages.empty() && "Unexpected empty pipeline found!");
+
+ do {
+ notifyCycleBegin();
+ if (Error Err = runCycle())
+ return std::move(Err);
+ notifyCycleEnd();
+ ++Cycles;
+ } while (hasWorkToProcess());
+
+ return Cycles;
+}
+
+Error Pipeline::runCycle() {
+ Error Err = ErrorSuccess();
+ // Update stages before we start processing new instructions.
+ for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+ const std::unique_ptr<Stage> &S = *I;
+ Err = S->cycleStart();
+ }
+
+ // Now fetch and execute new instructions.
+ InstRef IR;
+ Stage &FirstStage = *Stages[0];
+ while (!Err && FirstStage.isAvailable(IR))
+ Err = FirstStage.execute(IR);
+
+ // Update stages in preparation for a new cycle.
+ for (const std::unique_ptr<Stage> &S : Stages) {
+ Err = S->cycleEnd();
+ if (Err)
+ break;
+ }
+
+ return Err;
+}
+
+void Pipeline::appendStage(std::unique_ptr<Stage> S) {
+ assert(S && "Invalid null stage in input!");
+ if (!Stages.empty()) {
+ Stage *Last = Stages.back().get();
+ Last->setNextInSequence(S.get());
+ }
+
+ Stages.push_back(std::move(S));
+}
+
+void Pipeline::notifyCycleBegin() {
+ LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n');
+ for (HWEventListener *Listener : Listeners)
+ Listener->onCycleBegin();
+}
+
+void Pipeline::notifyCycleEnd() {
+ LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n");
+ for (HWEventListener *Listener : Listeners)
+ Listener->onCycleEnd();
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp
new file mode 100644
index 000000000000..7334a268e9a6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -0,0 +1,184 @@
+//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file models the dispatch component of an instruction pipeline.
+///
+/// The DispatchStage is responsible for updating instruction dependencies
+/// and communicating to the simulated instruction scheduler that an instruction
+/// is ready to be scheduled for execution.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+DispatchStage::DispatchStage(const MCSubtargetInfo &Subtarget,
+ const MCRegisterInfo &MRI,
+ unsigned MaxDispatchWidth, RetireControlUnit &R,
+ RegisterFile &F)
+ : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+ CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {
+ if (!DispatchWidth)
+ DispatchWidth = Subtarget.getSchedModel().IssueWidth;
+}
+
+void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
+ ArrayRef<unsigned> UsedRegs,
+ unsigned UOps) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionDispatchedEvent(IR, UsedRegs, UOps));
+}
+
+bool DispatchStage::checkPRF(const InstRef &IR) const {
+ SmallVector<unsigned, 4> RegDefs;
+ for (const WriteState &RegDef : IR.getInstruction()->getDefs())
+ RegDefs.emplace_back(RegDef.getRegisterID());
+
+ const unsigned RegisterMask = PRF.isAvailable(RegDefs);
+ // A mask with all zeroes means: register files are available.
+ if (RegisterMask) {
+ notifyEvent<HWStallEvent>(
+ HWStallEvent(HWStallEvent::RegisterFileStall, IR));
+ return false;
+ }
+
+ return true;
+}
+
+bool DispatchStage::checkRCU(const InstRef &IR) const {
+ const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
+ if (RCU.isAvailable(NumMicroOps))
+ return true;
+ notifyEvent<HWStallEvent>(
+ HWStallEvent(HWStallEvent::RetireControlUnitStall, IR));
+ return false;
+}
+
+bool DispatchStage::canDispatch(const InstRef &IR) const {
+ bool CanDispatch = checkRCU(IR);
+ CanDispatch &= checkPRF(IR);
+ CanDispatch &= checkNextStage(IR);
+ return CanDispatch;
+}
+
+Error DispatchStage::dispatch(InstRef IR) {
+ assert(!CarryOver && "Cannot dispatch another instruction!");
+ Instruction &IS = *IR.getInstruction();
+ const InstrDesc &Desc = IS.getDesc();
+ const unsigned NumMicroOps = Desc.NumMicroOps;
+ if (NumMicroOps > DispatchWidth) {
+ assert(AvailableEntries == DispatchWidth);
+ AvailableEntries = 0;
+ CarryOver = NumMicroOps - DispatchWidth;
+ CarriedOver = IR;
+ } else {
+ assert(AvailableEntries >= NumMicroOps);
+ AvailableEntries -= NumMicroOps;
+ }
+
+ // Check if this instructions ends the dispatch group.
+ if (Desc.EndGroup)
+ AvailableEntries = 0;
+
+ // Check if this is an optimizable reg-reg move.
+ if (IS.isOptimizableMove()) {
+ assert(IS.getDefs().size() == 1 && "Expected a single input!");
+ assert(IS.getUses().size() == 1 && "Expected a single output!");
+ if (PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]))
+ IS.setEliminated();
+ }
+
+ // A dependency-breaking instruction doesn't have to wait on the register
+ // input operands, and it is often optimized at register renaming stage.
+ // Update RAW dependencies if this instruction is not a dependency-breaking
+ // instruction. A dependency-breaking instruction is a zero-latency
+ // instruction that doesn't consume hardware resources.
+ // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
+ //
+ // We also don't update data dependencies for instructions that have been
+ // eliminated at register renaming stage.
+ if (!IS.isEliminated()) {
+ for (ReadState &RS : IS.getUses())
+ PRF.addRegisterRead(RS, STI);
+ }
+
+ // By default, a dependency-breaking zero-idiom is expected to be optimized
+ // at register renaming stage. That means, no physical register is allocated
+ // to the instruction.
+ SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
+ for (WriteState &WS : IS.getDefs())
+ PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
+
+ // Reserve slots in the RCU, and notify the instruction that it has been
+ // dispatched to the schedulers for execution.
+ IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
+
+ // Notify listeners of the "instruction dispatched" event,
+ // and move IR to the next stage.
+ notifyInstructionDispatched(IR, RegisterFiles,
+ std::min(DispatchWidth, NumMicroOps));
+ return moveToTheNextStage(IR);
+}
+
+Error DispatchStage::cycleStart() {
+ PRF.cycleStart();
+
+ if (!CarryOver) {
+ AvailableEntries = DispatchWidth;
+ return ErrorSuccess();
+ }
+
+ AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
+ unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries;
+ CarryOver -= DispatchedOpcodes;
+ assert(CarriedOver && "Invalid dispatched instruction");
+
+ SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U);
+ notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes);
+ if (!CarryOver)
+ CarriedOver = InstRef();
+ return ErrorSuccess();
+}
+
+bool DispatchStage::isAvailable(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+ if (Required > AvailableEntries)
+ return false;
+
+ if (Desc.BeginGroup && AvailableEntries != DispatchWidth)
+ return false;
+
+ // The dispatch logic doesn't internally buffer instructions. It only accepts
+ // instructions that can be successfully moved to the next stage during this
+ // same cycle.
+ return canDispatch(IR);
+}
+
+Error DispatchStage::execute(InstRef &IR) {
+ assert(canDispatch(IR) && "Cannot dispatch another instruction!");
+ return dispatch(IR);
+}
+
+#ifndef NDEBUG
+void DispatchStage::dump() const {
+ PRF.dump();
+ RCU.dump();
+}
+#endif
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp
new file mode 100644
index 000000000000..d2f5613a0fb6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -0,0 +1,77 @@
+//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Fetch stage of an instruction pipeline. Its sole
+/// purpose in life is to produce instructions for the rest of the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Instruction.h"
+
+namespace llvm {
+namespace mca {
+
+bool EntryStage::hasWorkToComplete() const {
+ return static_cast<bool>(CurrentInstruction);
+}
+
+bool EntryStage::isAvailable(const InstRef & /* unused */) const {
+ if (CurrentInstruction)
+ return checkNextStage(CurrentInstruction);
+ return false;
+}
+
+void EntryStage::getNextInstruction() {
+ assert(!CurrentInstruction && "There is already an instruction to process!");
+ if (!SM.hasNext())
+ return;
+ SourceRef SR = SM.peekNext();
+ std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second);
+ CurrentInstruction = InstRef(SR.first, Inst.get());
+ Instructions.emplace_back(std::move(Inst));
+ SM.updateNext();
+}
+
+llvm::Error EntryStage::execute(InstRef & /*unused */) {
+ assert(CurrentInstruction && "There is no instruction to process!");
+ if (llvm::Error Val = moveToTheNextStage(CurrentInstruction))
+ return Val;
+
+ // Move the program counter.
+ CurrentInstruction.invalidate();
+ getNextInstruction();
+ return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleStart() {
+ if (!CurrentInstruction)
+ getNextInstruction();
+ return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleEnd() {
+ // Find the first instruction which hasn't been retired.
+ auto Range = make_range(&Instructions[NumRetired], Instructions.end());
+ auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) {
+ return !I->isRetired();
+ });
+
+ NumRetired = std::distance(Instructions.begin(), It);
+ // Erase instructions up to the first that hasn't been retired.
+ if ((NumRetired * 2) >= Instructions.size()) {
+ Instructions.erase(Instructions.begin(), It);
+ NumRetired = 0;
+ }
+
+ return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
new file mode 100644
index 000000000000..a2b361fcd1bf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -0,0 +1,290 @@
+//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of an instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
+ switch (Status) {
+ case Scheduler::SC_LOAD_QUEUE_FULL:
+ return HWStallEvent::LoadQueueFull;
+ case Scheduler::SC_STORE_QUEUE_FULL:
+ return HWStallEvent::StoreQueueFull;
+ case Scheduler::SC_BUFFERS_FULL:
+ return HWStallEvent::SchedulerQueueFull;
+ case Scheduler::SC_DISPATCH_GROUP_STALL:
+ return HWStallEvent::DispatchGroupStall;
+ case Scheduler::SC_AVAILABLE:
+ return HWStallEvent::Invalid;
+ }
+
+ llvm_unreachable("Don't know how to process this StallKind!");
+}
+
+bool ExecuteStage::isAvailable(const InstRef &IR) const {
+ if (Scheduler::Status S = HWS.isAvailable(IR)) {
+ HWStallEvent::GenericEventType ET = toHWStallEventType(S);
+ notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
+ return false;
+ }
+
+ return true;
+}
+
+Error ExecuteStage::issueInstruction(InstRef &IR) {
+ SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
+ SmallVector<InstRef, 4> Pending;
+ SmallVector<InstRef, 4> Ready;
+
+ HWS.issueInstruction(IR, Used, Pending, Ready);
+ NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps;
+
+ notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
+
+ notifyInstructionIssued(IR, Used);
+ if (IR.getInstruction()->isExecuted()) {
+ notifyInstructionExecuted(IR);
+ // FIXME: add a buffer of executed instructions.
+ if (Error S = moveToTheNextStage(IR))
+ return S;
+ }
+
+ for (const InstRef &I : Pending)
+ notifyInstructionPending(I);
+
+ for (const InstRef &I : Ready)
+ notifyInstructionReady(I);
+ return ErrorSuccess();
+}
+
+Error ExecuteStage::issueReadyInstructions() {
+ InstRef IR = HWS.select();
+ while (IR) {
+ if (Error Err = issueInstruction(IR))
+ return Err;
+
+ // Select the next instruction to issue.
+ IR = HWS.select();
+ }
+
+ return ErrorSuccess();
+}
+
+Error ExecuteStage::cycleStart() {
+ SmallVector<ResourceRef, 8> Freed;
+ SmallVector<InstRef, 4> Executed;
+ SmallVector<InstRef, 4> Pending;
+ SmallVector<InstRef, 4> Ready;
+
+ HWS.cycleEvent(Freed, Executed, Pending, Ready);
+ NumDispatchedOpcodes = 0;
+ NumIssuedOpcodes = 0;
+
+ for (const ResourceRef &RR : Freed)
+ notifyResourceAvailable(RR);
+
+ for (InstRef &IR : Executed) {
+ notifyInstructionExecuted(IR);
+ // FIXME: add a buffer of executed instructions.
+ if (Error S = moveToTheNextStage(IR))
+ return S;
+ }
+
+ for (const InstRef &IR : Pending)
+ notifyInstructionPending(IR);
+
+ for (const InstRef &IR : Ready)
+ notifyInstructionReady(IR);
+
+ return issueReadyInstructions();
+}
+
+Error ExecuteStage::cycleEnd() {
+ if (!EnablePressureEvents)
+ return ErrorSuccess();
+
+ // Always conservatively report any backpressure events if the dispatch logic
+ // was stalled due to unavailable scheduler resources.
+ if (!HWS.hadTokenStall() && NumDispatchedOpcodes <= NumIssuedOpcodes)
+ return ErrorSuccess();
+
+ SmallVector<InstRef, 8> Insts;
+ uint64_t Mask = HWS.analyzeResourcePressure(Insts);
+ if (Mask) {
+ LLVM_DEBUG(dbgs() << "[E] Backpressure increased because of unavailable "
+ "pipeline resources: "
+ << format_hex(Mask, 16) << '\n');
+ HWPressureEvent Ev(HWPressureEvent::RESOURCES, Insts, Mask);
+ notifyEvent(Ev);
+ }
+
+ SmallVector<InstRef, 8> RegDeps;
+ SmallVector<InstRef, 8> MemDeps;
+ HWS.analyzeDataDependencies(RegDeps, MemDeps);
+ if (RegDeps.size()) {
+ LLVM_DEBUG(
+ dbgs() << "[E] Backpressure increased by register dependencies\n");
+ HWPressureEvent Ev(HWPressureEvent::REGISTER_DEPS, RegDeps);
+ notifyEvent(Ev);
+ }
+
+ if (MemDeps.size()) {
+ LLVM_DEBUG(dbgs() << "[E] Backpressure increased by memory dependencies\n");
+ HWPressureEvent Ev(HWPressureEvent::MEMORY_DEPS, MemDeps);
+ notifyEvent(Ev);
+ }
+
+ return ErrorSuccess();
+}
+
+#ifndef NDEBUG
+static void verifyInstructionEliminated(const InstRef &IR) {
+ const Instruction &Inst = *IR.getInstruction();
+ assert(Inst.isEliminated() && "Instruction was not eliminated!");
+ assert(Inst.isReady() && "Instruction in an inconsistent state!");
+
+ // Ensure that instructions eliminated at register renaming stage are in a
+ // consistent state.
+ const InstrDesc &Desc = Inst.getDesc();
+ assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!");
+}
+#endif
+
+Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
+#ifndef NDEBUG
+ verifyInstructionEliminated(IR);
+#endif
+ notifyInstructionPending(IR);
+ notifyInstructionReady(IR);
+ notifyInstructionIssued(IR, {});
+ IR.getInstruction()->forceExecuted();
+ notifyInstructionExecuted(IR);
+ return moveToTheNextStage(IR);
+}
+
+// Schedule the instruction for execution on the hardware.
+Error ExecuteStage::execute(InstRef &IR) {
+ assert(isAvailable(IR) && "Scheduler is not available!");
+
+#ifndef NDEBUG
+ // Ensure that the HWS has not stored this instruction in its queues.
+ HWS.sanityCheck(IR);
+#endif
+
+ if (IR.getInstruction()->isEliminated())
+ return handleInstructionEliminated(IR);
+
+ // Reserve a slot in each buffered resource. Also, mark units with
+ // BufferSize=0 as reserved. Resources with a buffer size of zero will only
+ // be released after MCIS is issued, and all the ResourceCycles for those
+ // units have been consumed.
+ bool IsReadyInstruction = HWS.dispatch(IR);
+ const Instruction &Inst = *IR.getInstruction();
+ NumDispatchedOpcodes += Inst.getDesc().NumMicroOps;
+ notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
+
+ if (!IsReadyInstruction) {
+ if (Inst.isPending())
+ notifyInstructionPending(IR);
+ return ErrorSuccess();
+ }
+
+ notifyInstructionPending(IR);
+
+ // If we did not return early, then the scheduler is ready for execution.
+ notifyInstructionReady(IR);
+
+ // If we cannot issue immediately, the HWS will add IR to its ready queue for
+ // execution later, so we must return early here.
+ if (!HWS.mustIssueImmediately(IR))
+ return ErrorSuccess();
+
+ // Issue IR to the underlying pipelines.
+ return issueInstruction(IR);
+}
+
+void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionEvent(HWInstructionEvent::Executed, IR));
+}
+
+void ExecuteStage::notifyInstructionPending(const InstRef &IR) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Pending: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionEvent(HWInstructionEvent::Pending, IR));
+}
+
+void ExecuteStage::notifyInstructionReady(const InstRef &IR) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionEvent(HWInstructionEvent::Ready, IR));
+}
+
+void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const {
+ LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
+ << RR.second << "]\n");
+ for (HWEventListener *Listener : getListeners())
+ Listener->onResourceAvailable(RR);
+}
+
+void ExecuteStage::notifyInstructionIssued(
+ const InstRef &IR,
+ MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
+ LLVM_DEBUG({
+ dbgs() << "[E] Instruction Issued: #" << IR << '\n';
+ for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
+ assert(Resource.second.getDenominator() == 1 && "Invalid cycles!");
+ dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
+ << Resource.first.second << "], ";
+ dbgs() << "cycles: " << Resource.second.getNumerator() << '\n';
+ }
+ });
+
+ // Replace resource masks with valid resource processor IDs.
+ for (std::pair<ResourceRef, ResourceCycles> &Use : Used)
+ Use.first.first = HWS.getResourceID(Use.first.first);
+
+ notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
+}
+
+void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR,
+ bool Reserved) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.Buffers.empty())
+ return;
+
+ SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end());
+ std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(),
+ [&](uint64_t Op) { return HWS.getResourceID(Op); });
+ if (Reserved) {
+ for (HWEventListener *Listener : getListeners())
+ Listener->onReservedBuffers(IR, BufferIDs);
+ return;
+ }
+
+ for (HWEventListener *Listener : getListeners())
+ Listener->onReleasedBuffers(IR, BufferIDs);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
new file mode 100644
index 000000000000..adeefb45ec2d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -0,0 +1,68 @@
+//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the method InstructionTables::execute().
+/// Method execute() prints a theoretical resource pressure distribution based
+/// on the information available in the scheduling model, and without running
+/// the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/InstructionTables.h"
+
+namespace llvm {
+namespace mca {
+
+Error InstructionTables::execute(InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ UsedResources.clear();
+
+ // Identify the resources consumed by this instruction.
+ for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) {
+ // Skip zero-cycle resources (i.e., unused resources).
+ if (!Resource.second.size())
+ continue;
+ unsigned Cycles = Resource.second.size();
+ unsigned Index = std::distance(
+ Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
+ unsigned NumUnits = ProcResource.NumUnits;
+ if (!ProcResource.SubUnitsIdxBegin) {
+ // The number of cycles consumed by each unit.
+ for (unsigned I = 0, E = NumUnits; I < E; ++I) {
+ ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
+ UsedResources.emplace_back(
+ std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
+ }
+ continue;
+ }
+
+ // This is a group. Obtain the set of resources contained in this
+ // group. Some of these resources may implement multiple units.
+ // Uniformly distribute Cycles across all of the units.
+ for (unsigned I1 = 0; I1 < NumUnits; ++I1) {
+ unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
+ const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
+ // Compute the number of cycles consumed by each resource unit.
+ for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
+ ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
+ UsedResources.emplace_back(std::make_pair(
+ ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
+ }
+ }
+ }
+
+ // Send a fake instruction issued event to all the views.
+ HWInstructionIssuedEvent Event(IR, UsedResources);
+ notifyEvent<HWInstructionIssuedEvent>(Event);
+ return ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp
new file mode 100644
index 000000000000..cb3e4c6979a4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp
@@ -0,0 +1,70 @@
+//===---------------------- MicroOpQueueStage.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the MicroOpQueueStage.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/MicroOpQueueStage.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+Error MicroOpQueueStage::moveInstructions() {
+ InstRef IR = Buffer[CurrentInstructionSlotIdx];
+ while (IR && checkNextStage(IR)) {
+ if (llvm::Error Val = moveToTheNextStage(IR))
+ return Val;
+
+ Buffer[CurrentInstructionSlotIdx].invalidate();
+ unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+ CurrentInstructionSlotIdx += NormalizedOpcodes;
+ CurrentInstructionSlotIdx %= Buffer.size();
+ AvailableEntries += NormalizedOpcodes;
+ IR = Buffer[CurrentInstructionSlotIdx];
+ }
+
+ return llvm::ErrorSuccess();
+}
+
+MicroOpQueueStage::MicroOpQueueStage(unsigned Size, unsigned IPC,
+ bool ZeroLatencyStage)
+ : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), MaxIPC(IPC),
+ CurrentIPC(0), IsZeroLatencyStage(ZeroLatencyStage) {
+ Buffer.resize(Size ? Size : 1);
+ AvailableEntries = Buffer.size();
+}
+
+Error MicroOpQueueStage::execute(InstRef &IR) {
+ Buffer[NextAvailableSlotIdx] = IR;
+ unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+ NextAvailableSlotIdx += NormalizedOpcodes;
+ NextAvailableSlotIdx %= Buffer.size();
+ AvailableEntries -= NormalizedOpcodes;
+ ++CurrentIPC;
+ return llvm::ErrorSuccess();
+}
+
+Error MicroOpQueueStage::cycleStart() {
+ CurrentIPC = 0;
+ if (!IsZeroLatencyStage)
+ return moveInstructions();
+ return llvm::ErrorSuccess();
+}
+
+Error MicroOpQueueStage::cycleEnd() {
+ if (IsZeroLatencyStage)
+ return moveInstructions();
+ return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp
new file mode 100644
index 000000000000..e1789dd7fa2a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -0,0 +1,61 @@
+//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the retire stage of an instruction pipeline.
+/// The RetireStage represents the process logic that interacts with the
+/// simulated RetireControlUnit hardware.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/RetireStage.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+llvm::Error RetireStage::cycleStart() {
+ if (RCU.isEmpty())
+ return llvm::ErrorSuccess();
+
+ const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
+ unsigned NumRetired = 0;
+ while (!RCU.isEmpty()) {
+ if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
+ break;
+ const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken();
+ if (!Current.Executed)
+ break;
+ RCU.consumeCurrentToken();
+ notifyInstructionRetired(Current.IR);
+ NumRetired++;
+ }
+
+ return llvm::ErrorSuccess();
+}
+
+llvm::Error RetireStage::execute(InstRef &IR) {
+ RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+ return llvm::ErrorSuccess();
+}
+
+void RetireStage::notifyInstructionRetired(const InstRef &IR) const {
+ LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
+ llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
+ const Instruction &Inst = *IR.getInstruction();
+
+ for (const WriteState &WS : Inst.getDefs())
+ PRF.removeRegisterWrite(WS, FreedRegs);
+ notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp
new file mode 100644
index 000000000000..ed512ac9711c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp
@@ -0,0 +1,28 @@
+//===---------------------- Stage.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage.
+/// A chain of stages compose an instruction pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+// Pin the vtable here in the implementation file.
+Stage::~Stage() = default;
+
+void Stage::addListener(HWEventListener *Listener) {
+ Listeners.insert(Listener);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/MCA/Support.cpp b/contrib/llvm-project/llvm/lib/MCA/Support.cpp
new file mode 100644
index 000000000000..ce1f0f6f211b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MCA/Support.cpp
@@ -0,0 +1,110 @@
+//===--------------------- Support.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a few helper functions used by various pipeline
+/// components.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Support.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+ResourceCycles &ResourceCycles::operator+=(const ResourceCycles &RHS) {
+ if (Denominator == RHS.Denominator)
+ Numerator += RHS.Numerator;
+ else {
+ // Create a common denominator for LHS and RHS by calculating the least
+ // common multiple from the GCD.
+ unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
+ unsigned LCM = (Denominator * RHS.Denominator) / GCD;
+ unsigned LHSNumerator = Numerator * (LCM / Denominator);
+ unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
+ Numerator = LHSNumerator + RHSNumerator;
+ Denominator = LCM;
+ }
+ return *this;
+}
+
+void computeProcResourceMasks(const MCSchedModel &SM,
+ MutableArrayRef<uint64_t> Masks) {
+ unsigned ProcResourceID = 0;
+
+ assert(Masks.size() == SM.getNumProcResourceKinds() &&
+ "Invalid number of elements");
+ // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
+ Masks[0] = 0;
+
+ // Create a unique bitmask for every processor resource unit.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ ProcResourceID++;
+ }
+
+ // Create a unique bitmask for every processor resource group.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (!Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ for (unsigned U = 0; U < Desc.NumUnits; ++U) {
+ uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]];
+ Masks[I] |= OtherMask;
+ }
+ ProcResourceID++;
+ }
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
+ << "\n");
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ LLVM_DEBUG(dbgs() << '[' << format_decimal(I,2) << "] " << " - "
+ << format_hex(Masks[I],16) << " - "
+ << Desc.Name << '\n');
+ }
+#endif
+}
+
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+ unsigned NumMicroOps,
+ ArrayRef<unsigned> ProcResourceUsage) {
+ // The block throughput is bounded from above by the hardware dispatch
+ // throughput. That is because the DispatchWidth is an upper bound on the
+ // number of opcodes that can be part of a single dispatch group.
+ double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+ // The block throughput is also limited by the amount of hardware parallelism.
+ // The number of available resource units affects the resource pressure
+ // distribution, as well as how many blocks can be executed every cycle.
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned ResourceCycles = ProcResourceUsage[I];
+ if (!ResourceCycles)
+ continue;
+
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+ double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
+ Max = std::max(Max, Throughput);
+ }
+
+ // The block reciprocal throughput is computed as the MAX of:
+ // - (NumMicroOps / DispatchWidth)
+ // - (NumUnits / ResourceCycles) for every consumed processor resource.
+ return Max;
+}
+
+} // namespace mca
+} // namespace llvm