diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
commit | 0b57cec536236d46e3dba9bd041533462f33dbb7 (patch) | |
tree | 56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm-project/llvm/lib/MCA | |
parent | 718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff) | |
download | src-0b57cec536236d46e3dba9bd041533462f33dbb7.tar.gz src-0b57cec536236d46e3dba9bd041533462f33dbb7.zip |
Move all sources from the llvm project into contrib/llvm-project.
This uses the new layout of the upstream repository, which was recently
migrated to GitHub, and converted into a "monorepo". That is, most of
the earlier separate sub-projects with their own branches and tags were
consolidated into one top-level directory, and are now branched and
tagged together.
Updating the vendor area to match this layout is next.
Notes
Notes:
svn path=/head/; revision=355940
Diffstat (limited to 'contrib/llvm-project/llvm/lib/MCA')
19 files changed, 3560 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/MCA/Context.cpp b/contrib/llvm-project/llvm/lib/MCA/Context.cpp new file mode 100644 index 000000000000..f0e8dfab8680 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Context.cpp @@ -0,0 +1,69 @@ +//===---------------------------- Context.cpp -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a class for holding ownership of various simulated +/// hardware units. A Context also provides a utility routine for constructing +/// a default out-of-order pipeline with fetch, dispatch, execute, and retire +/// stages. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Context.h" +#include "llvm/MCA/HardwareUnits/RegisterFile.h" +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/MCA/Stages/DispatchStage.h" +#include "llvm/MCA/Stages/EntryStage.h" +#include "llvm/MCA/Stages/ExecuteStage.h" +#include "llvm/MCA/Stages/MicroOpQueueStage.h" +#include "llvm/MCA/Stages/RetireStage.h" + +namespace llvm { +namespace mca { + +std::unique_ptr<Pipeline> +Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, + SourceMgr &SrcMgr) { + const MCSchedModel &SM = STI.getSchedModel(); + + // Create the hardware units defining the backend. + auto RCU = llvm::make_unique<RetireControlUnit>(SM); + auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize); + auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize, + Opts.StoreQueueSize, Opts.AssumeNoAlias); + auto HWS = llvm::make_unique<Scheduler>(SM, *LSU); + + // Create the pipeline stages. + auto Fetch = llvm::make_unique<EntryStage>(SrcMgr); + auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth, + *RCU, *PRF); + auto Execute = + llvm::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis); + auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF); + + // Pass the ownership of all the hardware units to this Context. + addHardwareUnit(std::move(RCU)); + addHardwareUnit(std::move(PRF)); + addHardwareUnit(std::move(LSU)); + addHardwareUnit(std::move(HWS)); + + // Build the pipeline. + auto StagePipeline = llvm::make_unique<Pipeline>(); + StagePipeline->appendStage(std::move(Fetch)); + if (Opts.MicroOpQueueSize) + StagePipeline->appendStage(llvm::make_unique<MicroOpQueueStage>( + Opts.MicroOpQueueSize, Opts.DecodersThroughput)); + StagePipeline->appendStage(std::move(Dispatch)); + StagePipeline->appendStage(std::move(Execute)); + StagePipeline->appendStage(std::move(Retire)); + return StagePipeline; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp b/contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp new file mode 100644 index 000000000000..58b2e0329222 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HWEventListener.cpp @@ -0,0 +1,22 @@ +//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a vtable anchor for class HWEventListener. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HWEventListener.h" + +namespace llvm { +namespace mca { + +// Anchor the vtable here. +void HWEventListener::anchor() {} +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp new file mode 100644 index 000000000000..69f793796ec7 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp @@ -0,0 +1,24 @@ +//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the anchor for the base class that describes +/// simulated hardware units. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" + +namespace llvm { +namespace mca { + +// Pin the vtable with this method. +HardwareUnit::~HardwareUnit() = default; + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp new file mode 100644 index 000000000000..ac1a6a36547b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -0,0 +1,206 @@ +//===----------------------- LSUnit.cpp --------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A Load-Store Unit for the llvm-mca tool. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/LSUnit.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, + bool AssumeNoAlias) + : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0), + NoAlias(AssumeNoAlias), NextGroupID(1) { + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (!LQSize && EPI.LoadQueueID) { + const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); + LQSize = LdQDesc.BufferSize; + } + + if (!SQSize && EPI.StoreQueueID) { + const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); + SQSize = StQDesc.BufferSize; + } + } +} + +LSUnitBase::~LSUnitBase() {} + +void LSUnitBase::cycleEvent() { + for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups) + G.second->cycleEvent(); +} + +#ifndef NDEBUG +void LSUnitBase::dump() const { + dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n'; + dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n'; + dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n'; + dbgs() << "[LSUnit] NextSQSlotIdx = " << getUsedSQEntries() << '\n'; + dbgs() << "\n"; + for (const auto &GroupIt : Groups) { + const MemoryGroup &Group = *GroupIt.second; + dbgs() << "[LSUnit] Group (" << GroupIt.first << "): " + << "[ #Preds = " << Group.getNumPredecessors() + << ", #GIssued = " << Group.getNumExecutingPredecessors() + << ", #GExecuted = " << Group.getNumExecutedPredecessors() + << ", #Inst = " << Group.getNumInstructions() + << ", #IIssued = " << Group.getNumExecuting() + << ", #IExecuted = " << Group.getNumExecuted() << '\n'; + } +} +#endif + +unsigned LSUnit::dispatch(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + unsigned IsMemBarrier = Desc.HasSideEffects; + assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); + + if (Desc.MayLoad) + assignLQSlot(); + if (Desc.MayStore) + assignSQSlot(); + + if (Desc.MayStore) { + // Always create a new group for store operations. + + // A store may not pass a previous store or store barrier. + unsigned NewGID = createMemoryGroup(); + MemoryGroup &NewGroup = getGroup(NewGID); + NewGroup.addInstruction(); + + // A store may not pass a previous load or load barrier. + unsigned ImmediateLoadDominator = + std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID); + if (ImmediateLoadDominator) { + MemoryGroup &IDom = getGroup(ImmediateLoadDominator); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator + << ") --> (" << NewGID << ")\n"); + IDom.addSuccessor(&NewGroup); + } + if (CurrentStoreGroupID) { + MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID + << ") --> (" << NewGID << ")\n"); + StoreGroup.addSuccessor(&NewGroup); + } + + CurrentStoreGroupID = NewGID; + if (Desc.MayLoad) { + CurrentLoadGroupID = NewGID; + if (IsMemBarrier) + CurrentLoadBarrierGroupID = NewGID; + } + + return NewGID; + } + + assert(Desc.MayLoad && "Expected a load!"); + + // Always create a new memory group if this is the first load of the sequence. + + // A load may not pass a previous store unless flag 'NoAlias' is set. + // A load may pass a previous load. + // A younger load cannot pass a older load barrier. + // A load barrier cannot pass a older load. + bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier || + CurrentLoadGroupID <= CurrentStoreGroupID || + CurrentLoadGroupID <= CurrentLoadBarrierGroupID; + if (ShouldCreateANewGroup) { + unsigned NewGID = createMemoryGroup(); + MemoryGroup &NewGroup = getGroup(NewGID); + NewGroup.addInstruction(); + + if (!assumeNoAlias() && CurrentStoreGroupID) { + MemoryGroup &StGroup = getGroup(CurrentStoreGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID + << ") --> (" << NewGID << ")\n"); + StGroup.addSuccessor(&NewGroup); + } + if (CurrentLoadBarrierGroupID) { + MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID + << ") --> (" << NewGID << ")\n"); + LdGroup.addSuccessor(&NewGroup); + } + + CurrentLoadGroupID = NewGID; + if (IsMemBarrier) + CurrentLoadBarrierGroupID = NewGID; + return NewGID; + } + + MemoryGroup &Group = getGroup(CurrentLoadGroupID); + Group.addInstruction(); + return CurrentLoadGroupID; +} + +LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + if (Desc.MayLoad && isLQFull()) + return LSUnit::LSU_LQUEUE_FULL; + if (Desc.MayStore && isSQFull()) + return LSUnit::LSU_SQUEUE_FULL; + return LSUnit::LSU_AVAILABLE; +} + +void LSUnitBase::onInstructionExecuted(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Expected a memory operation!"); + + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + auto It = Groups.find(GroupID); + It->second->onInstructionExecuted(); + if (It->second->isExecuted()) { + Groups.erase(It); + } + + if (IsALoad) { + UsedLQEntries--; + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() + << " has been removed from the load queue.\n"); + } + + if (IsAStore) { + UsedSQEntries--; + LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() + << " has been removed from the store queue.\n"); + } +} + +void LSUnit::onInstructionExecuted(const InstRef &IR) { + const Instruction &IS = *IR.getInstruction(); + if (!IS.isMemOp()) + return; + + LSUnitBase::onInstructionExecuted(IR); + unsigned GroupID = IS.getLSUTokenID(); + if (!isValidGroupID(GroupID)) { + if (GroupID == CurrentLoadGroupID) + CurrentLoadGroupID = 0; + if (GroupID == CurrentStoreGroupID) + CurrentStoreGroupID = 0; + if (GroupID == CurrentLoadBarrierGroupID) + CurrentLoadBarrierGroupID = 0; + } +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp new file mode 100644 index 000000000000..86a888ea8cae --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -0,0 +1,500 @@ +//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a register mapping file class. This class is responsible +/// for managing hardware register files and the tracking of data dependencies +/// between registers. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/RegisterFile.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri, + unsigned NumRegs) + : MRI(mri), + RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}), + ZeroRegisters(mri.getNumRegs(), false) { + initialize(SM, NumRegs); +} + +void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { + // Create a default register file that "sees" all the machine registers + // declared by the target. The number of physical registers in the default + // register file is set equal to `NumRegs`. A value of zero for `NumRegs` + // means: this register file has an unbounded number of physical registers. + RegisterFiles.emplace_back(NumRegs); + if (!SM.hasExtraProcessorInfo()) + return; + + // For each user defined register file, allocate a RegisterMappingTracker + // object. The size of every register file, as well as the mapping between + // register files and register classes is specified via tablegen. + const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); + + // Skip invalid register file at index 0. + for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) { + const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; + assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!"); + + // The cost of a register definition is equivalent to the number of + // physical registers that are allocated at register renaming stage. + unsigned Length = RF.NumRegisterCostEntries; + const MCRegisterCostEntry *FirstElt = + &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; + addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length)); + } +} + +void RegisterFile::cycleStart() { + for (RegisterMappingTracker &RMT : RegisterFiles) + RMT.NumMoveEliminated = 0; +} + +void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF, + ArrayRef<MCRegisterCostEntry> Entries) { + // A default register file is always allocated at index #0. That register file + // is mainly used to count the total number of mappings created by all + // register files at runtime. Users can limit the number of available physical + // registers in register file #0 through the command line flag + // `-register-file-size`. + unsigned RegisterFileIndex = RegisterFiles.size(); + RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle, + RF.AllowZeroMoveEliminationOnly); + + // Special case where there is no register class identifier in the set. + // An empty set of register classes means: this register file contains all + // the physical registers specified by the target. + // We optimistically assume that a register can be renamed at the cost of a + // single physical register. The constructor of RegisterFile ensures that + // a RegisterMapping exists for each logical register defined by the Target. + if (Entries.empty()) + return; + + // Now update the cost of individual registers. + for (const MCRegisterCostEntry &RCE : Entries) { + const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); + for (const MCPhysReg Reg : RC) { + RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; + IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; + if (IPC.first && IPC.first != RegisterFileIndex) { + // The only register file that is allowed to overlap is the default + // register file at index #0. The analysis is inaccurate if register + // files overlap. + errs() << "warning: register " << MRI.getName(Reg) + << " defined in multiple register files."; + } + IPC = std::make_pair(RegisterFileIndex, RCE.Cost); + Entry.RenameAs = Reg; + Entry.AllowMoveElimination = RCE.AllowMoveElimination; + + // Assume the same cost for each sub-register. + for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { + RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; + if (!OtherEntry.IndexPlusCost.first && + (!OtherEntry.RenameAs || + MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { + OtherEntry.IndexPlusCost = IPC; + OtherEntry.RenameAs = Reg; + } + } + } + } +} + +void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef<unsigned> UsedPhysRegs) { + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; + if (RegisterFileIndex) { + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedPhysRegs += Cost; + UsedPhysRegs[RegisterFileIndex] += Cost; + } + + // Now update the default register mapping tracker. + RegisterFiles[0].NumUsedPhysRegs += Cost; + UsedPhysRegs[0] += Cost; +} + +void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, + MutableArrayRef<unsigned> FreedPhysRegs) { + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; + if (RegisterFileIndex) { + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + RMT.NumUsedPhysRegs -= Cost; + FreedPhysRegs[RegisterFileIndex] += Cost; + } + + // Now update the default register mapping tracker. + RegisterFiles[0].NumUsedPhysRegs -= Cost; + FreedPhysRegs[0] += Cost; +} + +void RegisterFile::addRegisterWrite(WriteRef Write, + MutableArrayRef<unsigned> UsedPhysRegs) { + WriteState &WS = *Write.getWriteState(); + unsigned RegID = WS.getRegisterID(); + assert(RegID && "Adding an invalid register definition?"); + + LLVM_DEBUG({ + dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() + << ", " << MRI.getName(RegID) << "]\n"; + }); + + // If RenameAs is equal to RegID, then RegID is subject to register renaming + // and false dependencies on RegID are all eliminated. + + // If RenameAs references the invalid register, then we optimistically assume + // that it can be renamed. In the absence of tablegen descriptors for register + // files, RenameAs is always set to the invalid register ID. In all other + // cases, RenameAs must be either equal to RegID, or it must reference a + // super-register of RegID. + + // If RenameAs is a super-register of RegID, then a write to RegID has always + // a false dependency on RenameAs. The only exception is for when the write + // implicitly clears the upper portion of the underlying register. + // If a write clears its super-registers, then it is renamed as `RenameAs`. + bool IsWriteZero = WS.isWriteZero(); + bool IsEliminated = WS.isEliminated(); + bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated; + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + WS.setPRF(RRI.IndexPlusCost.first); + + if (RRI.RenameAs && RRI.RenameAs != RegID) { + RegID = RRI.RenameAs; + WriteRef &OtherWrite = RegisterMappings[RegID].first; + + if (!WS.clearsSuperRegisters()) { + // The processor keeps the definition of `RegID` together with register + // `RenameAs`. Since this partial write is not renamed, no physical + // register is allocated. + ShouldAllocatePhysRegs = false; + + WriteState *OtherWS = OtherWrite.getWriteState(); + if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { + // This partial write has a false dependency on RenameAs. + assert(!IsEliminated && "Unexpected partial update!"); + OtherWS->addUser(OtherWrite.getSourceIndex(), &WS); + } + } + } + + // Update zero registers. + unsigned ZeroRegisterID = + WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); + if (IsWriteZero) { + ZeroRegisters.setBit(ZeroRegisterID); + for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) + ZeroRegisters.setBit(*I); + } else { + ZeroRegisters.clearBit(ZeroRegisterID); + for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) + ZeroRegisters.clearBit(*I); + } + + // If this is move has been eliminated, then the call to tryEliminateMove + // should have already updated all the register mappings. + if (!IsEliminated) { + // Update the mapping for register RegID including its sub-registers. + RegisterMappings[RegID].first = Write; + RegisterMappings[RegID].second.AliasRegID = 0U; + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + RegisterMappings[*I].first = Write; + RegisterMappings[*I].second.AliasRegID = 0U; + } + + // No physical registers are allocated for instructions that are optimized + // in hardware. For example, zero-latency data-dependency breaking + // instructions don't consume physical registers. + if (ShouldAllocatePhysRegs) + allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); + } + + if (!WS.clearsSuperRegisters()) + return; + + for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { + if (!IsEliminated) { + RegisterMappings[*I].first = Write; + RegisterMappings[*I].second.AliasRegID = 0U; + } + + if (IsWriteZero) + ZeroRegisters.setBit(*I); + else + ZeroRegisters.clearBit(*I); + } +} + +void RegisterFile::removeRegisterWrite( + const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) { + // Early exit if this write was eliminated. A write eliminated at register + // renaming stage generates an alias, and it is not added to the PRF. + if (WS.isEliminated()) + return; + + unsigned RegID = WS.getRegisterID(); + + assert(RegID != 0 && "Invalidating an already invalid register?"); + assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && + "Invalidating a write of unknown cycles!"); + assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); + + bool ShouldFreePhysRegs = !WS.isWriteZero(); + unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + if (RenameAs && RenameAs != RegID) { + RegID = RenameAs; + + if (!WS.clearsSuperRegisters()) { + // Keep the definition of `RegID` together with register `RenameAs`. + ShouldFreePhysRegs = false; + } + } + + if (ShouldFreePhysRegs) + freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); + + WriteRef &WR = RegisterMappings[RegID].first; + if (WR.getWriteState() == &WS) + WR.invalidate(); + + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); + } + + if (!WS.clearsSuperRegisters()) + return; + + for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); + } +} + +bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { + const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()]; + const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()]; + + // From and To must be owned by the same PRF. + const RegisterRenamingInfo &RRIFrom = RMFrom.second; + const RegisterRenamingInfo &RRITo = RMTo.second; + unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first; + if (RegisterFileIndex != RRITo.IndexPlusCost.first) + return false; + + // We only allow move elimination for writes that update a full physical + // register. On X86, move elimination is possible with 32-bit general purpose + // registers because writes to those registers are not partial writes. If a + // register move is a partial write, then we conservatively assume that move + // elimination fails, since it would either trigger a partial update, or the + // issue of a merge opcode. + // + // Note that this constraint may be lifted in future. For example, we could + // make this model more flexible, and let users customize the set of registers + // (i.e. register classes) that allow move elimination. + // + // For now, we assume that there is a strong correlation between registers + // that allow move elimination, and how those same registers are renamed in + // hardware. + if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) { + // Early exit if the PRF doesn't support move elimination for this register. + if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination) + return false; + if (!WS.clearsSuperRegisters()) + return false; + } + + RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; + if (RMT.MaxMoveEliminatedPerCycle && + RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle) + return false; + + bool IsZeroMove = ZeroRegisters[RS.getRegisterID()]; + if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove) + return false; + + // Construct an alias. + MCPhysReg AliasedReg = + RRIFrom.RenameAs ? RRIFrom.RenameAs : RS.getRegisterID(); + MCPhysReg AliasReg = RRITo.RenameAs ? RRITo.RenameAs : WS.getRegisterID(); + + const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasedReg].second; + if (RMAlias.AliasRegID) + AliasedReg = RMAlias.AliasRegID; + + RegisterMappings[AliasReg].second.AliasRegID = AliasedReg; + for (MCSubRegIterator I(AliasReg, &MRI); I.isValid(); ++I) + RegisterMappings[*I].second.AliasRegID = AliasedReg; + + if (IsZeroMove) { + WS.setWriteZero(); + RS.setReadZero(); + } + WS.setEliminated(); + RMT.NumMoveEliminated++; + + return true; +} + +void RegisterFile::collectWrites(const ReadState &RS, + SmallVectorImpl<WriteRef> &Writes) const { + unsigned RegID = RS.getRegisterID(); + assert(RegID && RegID < RegisterMappings.size()); + LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " + << MRI.getName(RegID) << '\n'); + + // Check if this is an alias. + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + if (RRI.AliasRegID) + RegID = RRI.AliasRegID; + + const WriteRef &WR = RegisterMappings[RegID].first; + if (WR.isValid()) + Writes.push_back(WR); + + // Handle potential partial register updates. + for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { + const WriteRef &WR = RegisterMappings[*I].first; + if (WR.isValid()) + Writes.push_back(WR); + } + + // Remove duplicate entries and resize the input vector. + if (Writes.size() > 1) { + sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { + return Lhs.getWriteState() < Rhs.getWriteState(); + }); + auto It = std::unique(Writes.begin(), Writes.end()); + Writes.resize(std::distance(Writes.begin(), It)); + } + + LLVM_DEBUG({ + for (const WriteRef &WR : Writes) { + const WriteState &WS = *WR.getWriteState(); + dbgs() << "[PRF] Found a dependent use of Register " + << MRI.getName(WS.getRegisterID()) << " (defined by instruction #" + << WR.getSourceIndex() << ")\n"; + } + }); +} + +void RegisterFile::addRegisterRead(ReadState &RS, + const MCSubtargetInfo &STI) const { + unsigned RegID = RS.getRegisterID(); + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + RS.setPRF(RRI.IndexPlusCost.first); + if (RS.isIndependentFromDef()) + return; + + if (ZeroRegisters[RS.getRegisterID()]) + RS.setReadZero(); + + SmallVector<WriteRef, 4> DependentWrites; + collectWrites(RS, DependentWrites); + RS.setDependentWrites(DependentWrites.size()); + + // We know that this read depends on all the writes in DependentWrites. + // For each write, check if we have ReadAdvance information, and use it + // to figure out in how many cycles this read becomes available. + const ReadDescriptor &RD = RS.getDescriptor(); + const MCSchedModel &SM = STI.getSchedModel(); + const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); + for (WriteRef &WR : DependentWrites) { + WriteState &WS = *WR.getWriteState(); + unsigned WriteResID = WS.getWriteResourceID(); + int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); + WS.addUser(WR.getSourceIndex(), &RS, ReadAdvance); + } +} + +unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const { + SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles()); + + // Find how many new mappings must be created for each register file. + for (const unsigned RegID : Regs) { + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; + if (Entry.first) + NumPhysRegs[Entry.first] += Entry.second; + NumPhysRegs[0] += Entry.second; + } + + unsigned Response = 0; + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + unsigned NumRegs = NumPhysRegs[I]; + if (!NumRegs) + continue; + + const RegisterMappingTracker &RMT = RegisterFiles[I]; + if (!RMT.NumPhysRegs) { + // The register file has an unbounded number of microarchitectural + // registers. + continue; + } + + if (RMT.NumPhysRegs < NumRegs) { + // The current register file is too small. This may occur if the number of + // microarchitectural registers in register file #0 was changed by the + // users via flag -reg-file-size. Alternatively, the scheduling model + // specified a too small number of registers for this register file. + LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n"); + + // FIXME: Normalize the instruction register count to match the + // NumPhysRegs value. This is a highly unusual case, and is not expected + // to occur. This normalization is hiding an inconsistency in either the + // scheduling model or in the value that the user might have specified + // for NumPhysRegs. + NumRegs = RMT.NumPhysRegs; + } + + if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs)) + Response |= (1U << I); + } + + return Response; +} + +#ifndef NDEBUG +void RegisterFile::dump() const { + for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { + const RegisterMapping &RM = RegisterMappings[I]; + const RegisterRenamingInfo &RRI = RM.second; + if (ZeroRegisters[I]) { + dbgs() << MRI.getName(I) << ", " << I + << ", PRF=" << RRI.IndexPlusCost.first + << ", Cost=" << RRI.IndexPlusCost.second + << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I] + << ","; + RM.first.dump(); + dbgs() << '\n'; + } + } + + for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { + dbgs() << "Register File #" << I; + const RegisterMappingTracker &RMT = RegisterFiles[I]; + dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs + << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n'; + } +} +#endif + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp new file mode 100644 index 000000000000..06f2476353d6 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -0,0 +1,353 @@ +//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The classes here represent processor resource units and their management +/// strategy. These classes are managed by the Scheduler. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/ResourceManager.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" +ResourceStrategy::~ResourceStrategy() = default; + +static uint64_t selectImpl(uint64_t CandidateMask, + uint64_t &NextInSequenceMask) { + // The upper bit set in CandidateMask identifies our next candidate resource. + CandidateMask = 1ULL << getResourceStateIndex(CandidateMask); + NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); + return CandidateMask; +} + +uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) { + // This method assumes that ReadyMask cannot be zero. + uint64_t CandidateMask = ReadyMask & NextInSequenceMask; + if (CandidateMask) + return selectImpl(CandidateMask, NextInSequenceMask); + + NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; + RemovedFromNextInSequence = 0; + CandidateMask = ReadyMask & NextInSequenceMask; + if (CandidateMask) + return selectImpl(CandidateMask, NextInSequenceMask); + + NextInSequenceMask = ResourceUnitMask; + CandidateMask = ReadyMask & NextInSequenceMask; + return selectImpl(CandidateMask, NextInSequenceMask); +} + +void DefaultResourceStrategy::used(uint64_t Mask) { + if (Mask > NextInSequenceMask) { + RemovedFromNextInSequence |= Mask; + return; + } + + NextInSequenceMask &= (~Mask); + if (NextInSequenceMask) + return; + + NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; + RemovedFromNextInSequence = 0; +} + +ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index, + uint64_t Mask) + : ProcResourceDescIndex(Index), ResourceMask(Mask), + BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) { + if (IsAGroup) { + ResourceSizeMask = + ResourceMask ^ 1ULL << getResourceStateIndex(ResourceMask); + } else { + ResourceSizeMask = (1ULL << Desc.NumUnits) - 1; + } + ReadyMask = ResourceSizeMask; + AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize); + Unavailable = false; +} + +bool ResourceState::isReady(unsigned NumUnits) const { + return (!isReserved() || isADispatchHazard()) && + countPopulation(ReadyMask) >= NumUnits; +} + +ResourceStateEvent ResourceState::isBufferAvailable() const { + if (isADispatchHazard() && isReserved()) + return RS_RESERVED; + if (!isBuffered() || AvailableSlots) + return RS_BUFFER_AVAILABLE; + return RS_BUFFER_UNAVAILABLE; +} + +#ifndef NDEBUG +void ResourceState::dump() const { + dbgs() << "MASK=" << format_hex(ResourceMask, 16) + << ", SZMASK=" << format_hex(ResourceSizeMask, 16) + << ", RDYMASK=" << format_hex(ReadyMask, 16) + << ", BufferSize=" << BufferSize + << ", AvailableSlots=" << AvailableSlots + << ", Reserved=" << Unavailable << '\n'; +} +#endif + +static std::unique_ptr<ResourceStrategy> +getStrategyFor(const ResourceState &RS) { + if (RS.isAResourceGroup() || RS.getNumUnits() > 1) + return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask()); + return std::unique_ptr<ResourceStrategy>(nullptr); +} + +ResourceManager::ResourceManager(const MCSchedModel &SM) + : Resources(SM.getNumProcResourceKinds() - 1), + Strategies(SM.getNumProcResourceKinds() - 1), + Resource2Groups(SM.getNumProcResourceKinds() - 1, 0), + ProcResID2Mask(SM.getNumProcResourceKinds(), 0), + ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0), + ProcResUnitMask(0), ReservedResourceGroups(0) { + computeProcResourceMasks(SM, ProcResID2Mask); + + // initialize vector ResIndex2ProcResID. + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned Index = getResourceStateIndex(ProcResID2Mask[I]); + ResIndex2ProcResID[Index] = I; + } + + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + uint64_t Mask = ProcResID2Mask[I]; + unsigned Index = getResourceStateIndex(Mask); + Resources[Index] = + llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask); + Strategies[Index] = getStrategyFor(*Resources[Index]); + } + + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + uint64_t Mask = ProcResID2Mask[I]; + unsigned Index = getResourceStateIndex(Mask); + const ResourceState &RS = *Resources[Index]; + if (!RS.isAResourceGroup()) { + ProcResUnitMask |= Mask; + continue; + } + + uint64_t GroupMaskIdx = 1ULL << Index; + Mask -= GroupMaskIdx; + while (Mask) { + // Extract lowest set isolated bit. + uint64_t Unit = Mask & (-Mask); + unsigned IndexUnit = getResourceStateIndex(Unit); + Resource2Groups[IndexUnit] |= GroupMaskIdx; + Mask ^= Unit; + } + } + + AvailableProcResUnits = ProcResUnitMask; +} + +void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S, + uint64_t ResourceMask) { + unsigned Index = getResourceStateIndex(ResourceMask); + assert(Index < Resources.size() && "Invalid processor resource index!"); + assert(S && "Unexpected null strategy in input!"); + Strategies[Index] = std::move(S); +} + +unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const { + return ResIndex2ProcResID[getResourceStateIndex(Mask)]; +} + +unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const { + return Resources[getResourceStateIndex(ResourceID)]->getNumUnits(); +} + +// Returns the actual resource consumed by this Use. +// First, is the primary resource ID. +// Second, is the specific sub-resource ID. +ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) { + unsigned Index = getResourceStateIndex(ResourceID); + assert(Index < Resources.size() && "Invalid resource use!"); + ResourceState &RS = *Resources[Index]; + assert(RS.isReady() && "No available units to select!"); + + // Special case where RS is not a group, and it only declares a single + // resource unit. + if (!RS.isAResourceGroup() && RS.getNumUnits() == 1) + return std::make_pair(ResourceID, RS.getReadyMask()); + + uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask()); + if (RS.isAResourceGroup()) + return selectPipe(SubResourceID); + return std::make_pair(ResourceID, SubResourceID); +} + +void ResourceManager::use(const ResourceRef &RR) { + // Mark the sub-resource referenced by RR as used. + unsigned RSID = getResourceStateIndex(RR.first); + ResourceState &RS = *Resources[RSID]; + RS.markSubResourceAsUsed(RR.second); + // Remember to update the resource strategy for non-group resources with + // multiple units. + if (RS.getNumUnits() > 1) + Strategies[RSID]->used(RR.second); + + // If there are still available units in RR.first, + // then we are done. + if (RS.isReady()) + return; + + AvailableProcResUnits ^= RR.first; + + // Notify groups that RR.first is no longer available. + uint64_t Users = Resource2Groups[RSID]; + while (Users) { + // Extract lowest set isolated bit. + unsigned GroupIndex = getResourceStateIndex(Users & (-Users)); + ResourceState &CurrentUser = *Resources[GroupIndex]; + CurrentUser.markSubResourceAsUsed(RR.first); + Strategies[GroupIndex]->used(RR.first); + // Reset lowest set bit. + Users &= Users - 1; + } +} + +void ResourceManager::release(const ResourceRef &RR) { + unsigned RSID = getResourceStateIndex(RR.first); + ResourceState &RS = *Resources[RSID]; + bool WasFullyUsed = !RS.isReady(); + RS.releaseSubResource(RR.second); + if (!WasFullyUsed) + return; + + AvailableProcResUnits ^= RR.first; + + // Notify groups that RR.first is now available again. + uint64_t Users = Resource2Groups[RSID]; + while (Users) { + unsigned GroupIndex = getResourceStateIndex(Users & (-Users)); + ResourceState &CurrentUser = *Resources[GroupIndex]; + CurrentUser.releaseSubResource(RR.first); + Users &= Users - 1; + } +} + +ResourceStateEvent +ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const { + ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; + for (uint64_t Buffer : Buffers) { + ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; + Result = RS.isBufferAvailable(); + if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) + break; + } + return Result; +} + +void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) { + for (const uint64_t Buffer : Buffers) { + ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; + assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); + RS.reserveBuffer(); + + if (RS.isADispatchHazard()) { + assert(!RS.isReserved()); + RS.setReserved(); + } + } +} + +void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) { + for (const uint64_t R : Buffers) + Resources[getResourceStateIndex(R)]->releaseBuffer(); +} + +uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { + uint64_t BusyResourceMask = 0; + for (const std::pair<uint64_t, const ResourceUsage> &E : Desc.Resources) { + unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits; + unsigned Index = getResourceStateIndex(E.first); + if (!Resources[Index]->isReady(NumUnits)) + BusyResourceMask |= E.first; + } + + BusyResourceMask &= ProcResUnitMask; + if (BusyResourceMask) + return BusyResourceMask; + return Desc.UsedProcResGroups & ReservedResourceGroups; +} + +void ResourceManager::issueInstruction( + const InstrDesc &Desc, + SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) { + for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) { + const CycleSegment &CS = R.second.CS; + if (!CS.size()) { + releaseResource(R.first); + continue; + } + + assert(CS.begin() == 0 && "Invalid {Start, End} cycles!"); + if (!R.second.isReserved()) { + ResourceRef Pipe = selectPipe(R.first); + use(Pipe); + BusyResources[Pipe] += CS.size(); + Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>( + Pipe, ResourceCycles(CS.size()))); + } else { + assert((countPopulation(R.first) > 1) && "Expected a group!"); + // Mark this group as reserved. + assert(R.second.isReserved()); + reserveResource(R.first); + BusyResources[ResourceRef(R.first, R.first)] += CS.size(); + } + } +} + +void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) { + for (std::pair<ResourceRef, unsigned> &BR : BusyResources) { + if (BR.second) + BR.second--; + if (!BR.second) { + // Release this resource. + const ResourceRef &RR = BR.first; + + if (countPopulation(RR.first) == 1) + release(RR); + + releaseResource(RR.first); + ResourcesFreed.push_back(RR); + } + } + + for (const ResourceRef &RF : ResourcesFreed) + BusyResources.erase(RF); +} + +void ResourceManager::reserveResource(uint64_t ResourceID) { + const unsigned Index = getResourceStateIndex(ResourceID); + ResourceState &Resource = *Resources[Index]; + assert(Resource.isAResourceGroup() && !Resource.isReserved() && + "Unexpected resource found!"); + Resource.setReserved(); + ReservedResourceGroups ^= 1ULL << Index; +} + +void ResourceManager::releaseResource(uint64_t ResourceID) { + const unsigned Index = getResourceStateIndex(ResourceID); + ResourceState &Resource = *Resources[Index]; + Resource.clearReserved(); + if (Resource.isAResourceGroup()) + ReservedResourceGroups ^= 1ULL << Index; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp new file mode 100644 index 000000000000..068c5062ccdf --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -0,0 +1,87 @@ +//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file simulates the hardware responsible for retiring instructions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/RetireControlUnit.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), + AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + // Check if the scheduling model provides extra information about the machine + // processor. If so, then use that information to set the reorder buffer size + // and the maximum number of instructions retired per cycle. + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + AvailableSlots = EPI.ReorderBufferSize; + MaxRetirePerCycle = EPI.MaxRetirePerCycle; + } + + assert(AvailableSlots && "Invalid reorder buffer size!"); + Queue.resize(AvailableSlots); +} + +// Reserves a number of slots, and returns a new token. +unsigned RetireControlUnit::reserveSlot(const InstRef &IR, + unsigned NumMicroOps) { + assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); + unsigned NormalizedQuantity = + std::min(NumMicroOps, static_cast<unsigned>(Queue.size())); + // Zero latency instructions may have zero uOps. Artificially bump this + // value to 1. Although zero latency instructions don't consume scheduler + // resources, they still consume one slot in the retire queue. + NormalizedQuantity = std::max(NormalizedQuantity, 1U); + unsigned TokenID = NextAvailableSlotIdx; + Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; + NextAvailableSlotIdx += NormalizedQuantity; + NextAvailableSlotIdx %= Queue.size(); + AvailableSlots -= NormalizedQuantity; + return TokenID; +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { + return Queue[CurrentInstructionSlotIdx]; +} + +void RetireControlUnit::consumeCurrentToken() { + RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; + assert(Current.NumSlots && "Reserved zero slots?"); + assert(Current.IR && "Invalid RUToken in the RCU queue."); + Current.IR.getInstruction()->retire(); + + // Update the slot index to be the next item in the circular queue. + CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx %= Queue.size(); + AvailableSlots += Current.NumSlots; +} + +void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { + assert(Queue.size() > TokenID); + assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); + Queue[TokenID].Executed = true; +} + +#ifndef NDEBUG +void RetireControlUnit::dump() const { + dbgs() << "Retire Unit: { Total Slots=" << Queue.size() + << ", Available Slots=" << AvailableSlots << " }\n"; +} +#endif + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp new file mode 100644 index 000000000000..0f0f2ffb8325 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp @@ -0,0 +1,343 @@ +//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A scheduler for processor resource units and processor resource groups. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) { + // Ensure we have a valid (non-null) strategy object. + Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>(); +} + +// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. +SchedulerStrategy::~SchedulerStrategy() = default; +DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; + +#ifndef NDEBUG +void Scheduler::dump() const { + dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; + dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; + dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; + Resources->dump(); +} +#endif + +Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + + ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers); + HadTokenStall = RSE != RS_BUFFER_AVAILABLE; + + switch (RSE) { + case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: + return Scheduler::SC_BUFFERS_FULL; + case ResourceStateEvent::RS_RESERVED: + return Scheduler::SC_DISPATCH_GROUP_STALL; + case ResourceStateEvent::RS_BUFFER_AVAILABLE: + break; + } + + // Give lower priority to LSUnit stall events. + LSUnit::Status LSS = LSU.isAvailable(IR); + HadTokenStall = LSS != LSUnit::LSU_AVAILABLE; + + switch (LSS) { + case LSUnit::LSU_LQUEUE_FULL: + return Scheduler::SC_LOAD_QUEUE_FULL; + case LSUnit::LSU_SQUEUE_FULL: + return Scheduler::SC_STORE_QUEUE_FULL; + case LSUnit::LSU_AVAILABLE: + return Scheduler::SC_AVAILABLE; + } + + llvm_unreachable("Don't know how to process this LSU state result!"); +} + +void Scheduler::issueInstructionImpl( + InstRef &IR, + SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) { + Instruction *IS = IR.getInstruction(); + const InstrDesc &D = IS->getDesc(); + + // Issue the instruction and collect all the consumed resources + // into a vector. That vector is then used to notify the listener. + Resources->issueInstruction(D, UsedResources); + + // Notify the instruction that it started executing. + // This updates the internal state of each write. + IS->execute(IR.getSourceIndex()); + + IS->computeCriticalRegDep(); + + if (IS->isMemOp()) { + LSU.onInstructionIssued(IR); + const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID()); + IS->setCriticalMemDep(Group.getCriticalPredecessor()); + } + + if (IS->isExecuting()) + IssuedSet.emplace_back(IR); + else if (IS->isExecuted()) + LSU.onInstructionExecuted(IR); +} + +// Release the buffered resources and issue the instruction. +void Scheduler::issueInstruction( + InstRef &IR, + SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources, + SmallVectorImpl<InstRef> &PendingInstructions, + SmallVectorImpl<InstRef> &ReadyInstructions) { + const Instruction &Inst = *IR.getInstruction(); + bool HasDependentUsers = Inst.hasDependentUsers(); + HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); + + Resources->releaseBuffers(Inst.getDesc().Buffers); + issueInstructionImpl(IR, UsedResources); + // Instructions that have been issued during this cycle might have unblocked + // other dependent instructions. Dependent instructions may be issued during + // this same cycle if operands have ReadAdvance entries. Promote those + // instructions to the ReadySet and notify the caller that those are ready. + if (HasDependentUsers) + if (promoteToPendingSet(PendingInstructions)) + promoteToReadySet(ReadyInstructions); +} + +bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) { + // Scan the set of waiting instructions and promote them to the + // ready set if operands are all ready. + unsigned PromotedElements = 0; + for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR) + break; + + // Check if there are unsolved register dependencies. + Instruction &IS = *IR.getInstruction(); + if (!IS.isReady() && !IS.updatePending()) { + ++I; + continue; + } + // Check if there are unsolved memory dependencies. + if (IS.isMemOp() && !LSU.isReady(IR)) { + ++I; + continue; + } + + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " promoted to the READY set.\n"); + + Ready.emplace_back(IR); + ReadySet.emplace_back(IR); + + IR.invalidate(); + ++PromotedElements; + std::iter_swap(I, E - PromotedElements); + } + + PendingSet.resize(PendingSet.size() - PromotedElements); + return PromotedElements; +} + +bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) { + // Scan the set of waiting instructions and promote them to the + // pending set if operands are all ready. + unsigned RemovedElements = 0; + for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR) + break; + + // Check if this instruction is now ready. In case, force + // a transition in state using method 'updateDispatched()'. + Instruction &IS = *IR.getInstruction(); + if (IS.isDispatched() && !IS.updateDispatched()) { + ++I; + continue; + } + + if (IS.isMemOp() && LSU.isWaiting(IR)) { + ++I; + continue; + } + + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " promoted to the PENDING set.\n"); + + Pending.emplace_back(IR); + PendingSet.emplace_back(IR); + + IR.invalidate(); + ++RemovedElements; + std::iter_swap(I, E - RemovedElements); + } + + WaitSet.resize(WaitSet.size() - RemovedElements); + return RemovedElements; +} + +InstRef Scheduler::select() { + unsigned QueueIndex = ReadySet.size(); + for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { + InstRef &IR = ReadySet[I]; + if (QueueIndex == ReadySet.size() || + Strategy->compare(IR, ReadySet[QueueIndex])) { + Instruction &IS = *IR.getInstruction(); + uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc()); + if (BusyResourceMask) + IS.setCriticalResourceMask(BusyResourceMask); + BusyResourceUnits |= BusyResourceMask; + if (!BusyResourceMask) + QueueIndex = I; + } + } + + if (QueueIndex == ReadySet.size()) + return InstRef(); + + // We found an instruction to issue. + InstRef IR = ReadySet[QueueIndex]; + std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); + ReadySet.pop_back(); + return IR; +} + +void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) { + unsigned RemovedElements = 0; + for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { + InstRef &IR = *I; + if (!IR) + break; + Instruction &IS = *IR.getInstruction(); + if (!IS.isExecuted()) { + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR + << " is still executing.\n"); + ++I; + continue; + } + + // Instruction IR has completed execution. + LSU.onInstructionExecuted(IR); + Executed.emplace_back(IR); + ++RemovedElements; + IR.invalidate(); + std::iter_swap(I, E - RemovedElements); + } + + IssuedSet.resize(IssuedSet.size() - RemovedElements); +} + +uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) { + Insts.insert(Insts.end(), ReadySet.begin(), ReadySet.end()); + return BusyResourceUnits; +} + +void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps, + SmallVectorImpl<InstRef> &MemDeps) { + const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet; + for (const InstRef &IR : make_range(PendingSet.begin(), EndIt)) { + const Instruction &IS = *IR.getInstruction(); + if (Resources->checkAvailability(IS.getDesc())) + continue; + + if (IS.isMemOp() && LSU.isPending(IR)) + MemDeps.emplace_back(IR); + + if (IS.isPending()) + RegDeps.emplace_back(IR); + } +} + +void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed, + SmallVectorImpl<InstRef> &Executed, + SmallVectorImpl<InstRef> &Pending, + SmallVectorImpl<InstRef> &Ready) { + LSU.cycleEvent(); + + // Release consumed resources. + Resources->cycleEvent(Freed); + + for (InstRef &IR : IssuedSet) + IR.getInstruction()->cycleEvent(); + updateIssuedSet(Executed); + + for (InstRef &IR : PendingSet) + IR.getInstruction()->cycleEvent(); + + for (InstRef &IR : WaitSet) + IR.getInstruction()->cycleEvent(); + + promoteToPendingSet(Pending); + promoteToReadySet(Ready); + + NumDispatchedToThePendingSet = 0; + BusyResourceUnits = 0; +} + +bool Scheduler::mustIssueImmediately(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + if (Desc.isZeroLatency()) + return true; + // Instructions that use an in-order dispatch/issue processor resource must be + // issued immediately to the pipeline(s). Any other in-order buffered + // resources (i.e. BufferSize=1) is consumed. + return Desc.MustIssueImmediately; +} + +bool Scheduler::dispatch(InstRef &IR) { + Instruction &IS = *IR.getInstruction(); + const InstrDesc &Desc = IS.getDesc(); + Resources->reserveBuffers(Desc.Buffers); + + // If necessary, reserve queue entries in the load-store unit (LSU). + if (IS.isMemOp()) + IS.setLSUTokenID(LSU.dispatch(IR)); + + if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); + WaitSet.push_back(IR); + return false; + } + + if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR + << " to the PendingSet\n"); + PendingSet.push_back(IR); + ++NumDispatchedToThePendingSet; + return false; + } + + assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) && + "Unexpected internal state found!"); + // Don't add a zero-latency instruction to the Ready queue. + // A zero-latency instruction doesn't consume any scheduler resources. That is + // because it doesn't need to be executed, and it is often removed at register + // renaming stage. For example, register-register moves are often optimized at + // register renaming stage by simply updating register aliases. On some + // targets, zero-idiom instructions (for example: a xor that clears the value + // of a register) are treated specially, and are often eliminated at register + // renaming stage. + if (!mustIssueImmediately(IR)) { + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); + ReadySet.push_back(IR); + } + + return true; +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp new file mode 100644 index 000000000000..829920366c90 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp @@ -0,0 +1,717 @@ +//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstrBuilder interface. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/InstrBuilder.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, + const llvm::MCInstrInfo &mcii, + const llvm::MCRegisterInfo &mri, + const llvm::MCInstrAnalysis *mcia) + : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), + FirstReturnInst(true) { + const MCSchedModel &SM = STI.getSchedModel(); + ProcResourceMasks.resize(SM.getNumProcResourceKinds()); + computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); +} + +static void initializeUsedResources(InstrDesc &ID, + const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI, + ArrayRef<uint64_t> ProcResourceMasks) { + const MCSchedModel &SM = STI.getSchedModel(); + + // Populate resources consumed. + using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; + std::vector<ResourcePlusCycles> Worklist; + + // Track cycles contributed by resources that are in a "Super" relationship. + // This is required if we want to correctly match the behavior of method + // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set + // of "consumed" processor resources and resource cycles, the logic in + // ExpandProcResource() doesn't update the number of resource cycles + // contributed by a "Super" resource to a group. + // We need to take this into account when we find that a processor resource is + // part of a group, and it is also used as the "Super" of other resources. + // This map stores the number of cycles contributed by sub-resources that are + // part of a "Super" resource. The key value is the "Super" resource mask ID. + DenseMap<uint64_t, unsigned> SuperResources; + + unsigned NumProcResources = SM.getNumProcResourceKinds(); + APInt Buffers(NumProcResources, 0); + + bool AllInOrderResources = true; + bool AnyDispatchHazards = false; + for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { + const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; + const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); + if (!PRE->Cycles) { +#ifndef NDEBUG + WithColor::warning() + << "Ignoring invalid write of zero cycles on processor resource " + << PR.Name << "\n"; + WithColor::note() << "found in scheduling class " << SCDesc.Name + << " (write index #" << I << ")\n"; +#endif + continue; + } + + uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; + if (PR.BufferSize < 0) { + AllInOrderResources = false; + } else { + Buffers.setBit(PRE->ProcResourceIdx); + AnyDispatchHazards |= (PR.BufferSize == 0); + AllInOrderResources &= (PR.BufferSize <= 1); + } + + CycleSegment RCy(0, PRE->Cycles, false); + Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); + if (PR.SuperIdx) { + uint64_t Super = ProcResourceMasks[PR.SuperIdx]; + SuperResources[Super] += PRE->Cycles; + } + } + + ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; + + // Sort elements by mask popcount, so that we prioritize resource units over + // resource groups, and smaller groups over larger groups. + sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { + unsigned popcntA = countPopulation(A.first); + unsigned popcntB = countPopulation(B.first); + if (popcntA < popcntB) + return true; + if (popcntA > popcntB) + return false; + return A.first < B.first; + }); + + uint64_t UsedResourceUnits = 0; + uint64_t UsedResourceGroups = 0; + + // Remove cycles contributed by smaller resources. + for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { + ResourcePlusCycles &A = Worklist[I]; + if (!A.second.size()) { + assert(countPopulation(A.first) > 1 && "Expected a group!"); + UsedResourceGroups |= PowerOf2Floor(A.first); + continue; + } + + ID.Resources.emplace_back(A); + uint64_t NormalizedMask = A.first; + if (countPopulation(A.first) == 1) { + UsedResourceUnits |= A.first; + } else { + // Remove the leading 1 from the resource group mask. + NormalizedMask ^= PowerOf2Floor(NormalizedMask); + UsedResourceGroups |= (A.first ^ NormalizedMask); + } + + for (unsigned J = I + 1; J < E; ++J) { + ResourcePlusCycles &B = Worklist[J]; + if ((NormalizedMask & B.first) == NormalizedMask) { + B.second.CS.subtract(A.second.size() - SuperResources[A.first]); + if (countPopulation(B.first) > 1) + B.second.NumUnits++; + } + } + } + + ID.UsedProcResUnits = UsedResourceUnits; + ID.UsedProcResGroups = UsedResourceGroups; + + // A SchedWrite may specify a number of cycles in which a resource group + // is reserved. For example (on target x86; cpu Haswell): + // + // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { + // let ResourceCycles = [2, 2, 3]; + // } + // + // This means: + // Resource units HWPort0 and HWPort1 are both used for 2cy. + // Resource group HWPort01 is the union of HWPort0 and HWPort1. + // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 + // will not be usable for 2 entire cycles from instruction issue. + // + // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency + // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an + // extra delay on top of the 2 cycles latency. + // During those extra cycles, HWPort01 is not usable by other instructions. + for (ResourcePlusCycles &RPC : ID.Resources) { + if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { + // Remove the leading 1 from the resource group mask. + uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); + if ((Mask & UsedResourceUnits) == Mask) + RPC.second.setReserved(); + } + } + + // Identify extra buffers that are consumed through super resources. + for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { + for (unsigned I = 1, E = NumProcResources; I < E; ++I) { + const MCProcResourceDesc &PR = *SM.getProcResource(I); + if (PR.BufferSize == -1) + continue; + + uint64_t Mask = ProcResourceMasks[I]; + if (Mask != SR.first && ((Mask & SR.first) == SR.first)) + Buffers.setBit(I); + } + } + + // Now set the buffers. + if (unsigned NumBuffers = Buffers.countPopulation()) { + ID.Buffers.resize(NumBuffers); + for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { + if (Buffers[I]) { + --NumBuffers; + ID.Buffers[NumBuffers] = ProcResourceMasks[I]; + } + } + } + + LLVM_DEBUG({ + for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) + dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " + << "Reserved=" << R.second.isReserved() << ", " + << "#Units=" << R.second.NumUnits << ", " + << "cy=" << R.second.size() << '\n'; + for (const uint64_t R : ID.Buffers) + dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; + dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; + dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) + << '\n'; + }); +} + +static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, + const MCSchedClassDesc &SCDesc, + const MCSubtargetInfo &STI) { + if (MCDesc.isCall()) { + // We cannot estimate how long this call will take. + // Artificially set an arbitrarily high latency (100cy). + ID.MaxLatency = 100U; + return; + } + + int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + // If latency is unknown, then conservatively assume a MaxLatency of 100cy. + ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); +} + +static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { + // Count register definitions, and skip non register operands in the process. + unsigned I, E; + unsigned NumExplicitDefs = MCDesc.getNumDefs(); + for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { + const MCOperand &Op = MCI.getOperand(I); + if (Op.isReg()) + --NumExplicitDefs; + } + + if (NumExplicitDefs) { + return make_error<InstructionError<MCInst>>( + "Expected more register operand definitions.", MCI); + } + + if (MCDesc.hasOptionalDef()) { + // Always assume that the optional definition is the last operand. + const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); + if (I == MCI.getNumOperands() || !Op.isReg()) { + std::string Message = + "expected a register operand for an optional definition. Instruction " + "has not been correctly analyzed."; + return make_error<InstructionError<MCInst>>(Message, MCI); + } + } + + return ErrorSuccess(); +} + +void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, + unsigned SchedClassID) { + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + const MCSchedModel &SM = STI.getSchedModel(); + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + + // Assumptions made by this algorithm: + // 1. The number of explicit and implicit register definitions in a MCInst + // matches the number of explicit and implicit definitions according to + // the opcode descriptor (MCInstrDesc). + // 2. Uses start at index #(MCDesc.getNumDefs()). + // 3. There can only be a single optional register definition, an it is + // always the last operand of the sequence (excluding extra operands + // contributed by variadic opcodes). + // + // These assumptions work quite well for most out-of-order in-tree targets + // like x86. This is mainly because the vast majority of instructions is + // expanded to MCInst using a straightforward lowering logic that preserves + // the ordering of the operands. + // + // About assumption 1. + // The algorithm allows non-register operands between register operand + // definitions. This helps to handle some special ARM instructions with + // implicit operand increment (-mtriple=armv7): + // + // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed + // @ <MCOperand Reg:59> + // @ <MCOperand Imm:0> (!!) + // @ <MCOperand Reg:67> + // @ <MCOperand Imm:0> + // @ <MCOperand Imm:14> + // @ <MCOperand Reg:0>> + // + // MCDesc reports: + // 6 explicit operands. + // 1 optional definition + // 2 explicit definitions (!!) + // + // The presence of an 'Imm' operand between the two register definitions + // breaks the assumption that "register definitions are always at the + // beginning of the operand sequence". + // + // To workaround this issue, this algorithm ignores (i.e. skips) any + // non-register operands between register definitions. The optional + // definition is still at index #(NumOperands-1). + // + // According to assumption 2. register reads start at #(NumExplicitDefs-1). + // That means, register R1 from the example is both read and written. + unsigned NumExplicitDefs = MCDesc.getNumDefs(); + unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); + unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; + unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; + if (MCDesc.hasOptionalDef()) + TotalDefs++; + + unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); + ID.Writes.resize(TotalDefs + NumVariadicOps); + // Iterate over the operands list, and skip non-register operands. + // The first NumExplictDefs register operands are expected to be register + // definitions. + unsigned CurrentDef = 0; + unsigned i = 0; + for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { + const MCOperand &Op = MCI.getOperand(i); + if (!Op.isReg()) + continue; + + WriteDescriptor &Write = ID.Writes[CurrentDef]; + Write.OpIndex = i; + if (CurrentDef < NumWriteLatencyEntries) { + const MCWriteLatencyEntry &WLE = + *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); + // Conservatively default to MaxLatency. + Write.Latency = + WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); + Write.SClassOrWriteResourceID = WLE.WriteResourceID; + } else { + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + } + Write.IsOptionalDef = false; + LLVM_DEBUG({ + dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + CurrentDef++; + } + + assert(CurrentDef == NumExplicitDefs && + "Expected more register operand definitions."); + for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { + unsigned Index = NumExplicitDefs + CurrentDef; + WriteDescriptor &Write = ID.Writes[Index]; + Write.OpIndex = ~CurrentDef; + Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; + if (Index < NumWriteLatencyEntries) { + const MCWriteLatencyEntry &WLE = + *STI.getWriteLatencyEntry(&SCDesc, Index); + // Conservatively default to MaxLatency. + Write.Latency = + WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); + Write.SClassOrWriteResourceID = WLE.WriteResourceID; + } else { + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + } + + Write.IsOptionalDef = false; + assert(Write.RegisterID != 0 && "Expected a valid phys register!"); + LLVM_DEBUG({ + dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex + << ", PhysReg=" << MRI.getName(Write.RegisterID) + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + if (MCDesc.hasOptionalDef()) { + WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; + Write.OpIndex = MCDesc.getNumOperands() - 1; + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + Write.IsOptionalDef = true; + LLVM_DEBUG({ + dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + if (!NumVariadicOps) + return; + + // FIXME: if an instruction opcode is flagged 'mayStore', and it has no + // "unmodeledSideEffects', then this logic optimistically assumes that any + // extra register operands in the variadic sequence is not a register + // definition. + // + // Otherwise, we conservatively assume that any register operand from the + // variadic sequence is both a register read and a register write. + bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && + !MCDesc.hasUnmodeledSideEffects(); + CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); + for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); + I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { + const MCOperand &Op = MCI.getOperand(OpIndex); + if (!Op.isReg()) + continue; + + WriteDescriptor &Write = ID.Writes[CurrentDef]; + Write.OpIndex = OpIndex; + // Assign a default latency for this write. + Write.Latency = ID.MaxLatency; + Write.SClassOrWriteResourceID = 0; + Write.IsOptionalDef = false; + ++CurrentDef; + LLVM_DEBUG({ + dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex + << ", Latency=" << Write.Latency + << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; + }); + } + + ID.Writes.resize(CurrentDef); +} + +void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, + unsigned SchedClassID) { + const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); + unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); + unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); + // Remove the optional definition. + if (MCDesc.hasOptionalDef()) + --NumExplicitUses; + unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); + unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; + ID.Reads.resize(TotalUses); + unsigned CurrentUse = 0; + for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; + ++I, ++OpIndex) { + const MCOperand &Op = MCI.getOperand(OpIndex); + if (!Op.isReg()) + continue; + + ReadDescriptor &Read = ID.Reads[CurrentUse]; + Read.OpIndex = OpIndex; + Read.UseIndex = I; + Read.SchedClassID = SchedClassID; + ++CurrentUse; + LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); + } + + // For the purpose of ReadAdvance, implicit uses come directly after explicit + // uses. The "UseIndex" must be updated according to that implicit layout. + for (unsigned I = 0; I < NumImplicitUses; ++I) { + ReadDescriptor &Read = ID.Reads[CurrentUse + I]; + Read.OpIndex = ~I; + Read.UseIndex = NumExplicitUses + I; + Read.RegisterID = MCDesc.getImplicitUses()[I]; + Read.SchedClassID = SchedClassID; + LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex + << ", UseIndex=" << Read.UseIndex << ", RegisterID=" + << MRI.getName(Read.RegisterID) << '\n'); + } + + CurrentUse += NumImplicitUses; + + // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no + // "unmodeledSideEffects", then this logic optimistically assumes that any + // extra register operands in the variadic sequence are not register + // definition. + + bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && + !MCDesc.hasUnmodeledSideEffects(); + for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); + I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { + const MCOperand &Op = MCI.getOperand(OpIndex); + if (!Op.isReg()) + continue; + + ReadDescriptor &Read = ID.Reads[CurrentUse]; + Read.OpIndex = OpIndex; + Read.UseIndex = NumExplicitUses + NumImplicitUses + I; + Read.SchedClassID = SchedClassID; + ++CurrentUse; + LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex + << ", UseIndex=" << Read.UseIndex << '\n'); + } + + ID.Reads.resize(CurrentUse); +} + +Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, + const MCInst &MCI) const { + if (ID.NumMicroOps != 0) + return ErrorSuccess(); + + bool UsesMemory = ID.MayLoad || ID.MayStore; + bool UsesBuffers = !ID.Buffers.empty(); + bool UsesResources = !ID.Resources.empty(); + if (!UsesMemory && !UsesBuffers && !UsesResources) + return ErrorSuccess(); + + StringRef Message; + if (UsesMemory) { + Message = "found an inconsistent instruction that decodes " + "into zero opcodes and that consumes load/store " + "unit resources."; + } else { + Message = "found an inconsistent instruction that decodes " + "to zero opcodes and that consumes scheduler " + "resources."; + } + + return make_error<InstructionError<MCInst>>(Message, MCI); +} + +Expected<const InstrDesc &> +InstrBuilder::createInstrDescImpl(const MCInst &MCI) { + assert(STI.getSchedModel().hasInstrSchedModel() && + "Itineraries are not yet supported!"); + + // Obtain the instruction descriptor from the opcode. + unsigned short Opcode = MCI.getOpcode(); + const MCInstrDesc &MCDesc = MCII.get(Opcode); + const MCSchedModel &SM = STI.getSchedModel(); + + // Then obtain the scheduling class information from the instruction. + unsigned SchedClassID = MCDesc.getSchedClass(); + bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); + + // Try to solve variant scheduling classes. + if (IsVariant) { + unsigned CPUID = SM.getProcessorID(); + while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) + SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); + + if (!SchedClassID) { + return make_error<InstructionError<MCInst>>( + "unable to resolve scheduling class for write variant.", MCI); + } + } + + // Check if this instruction is supported. Otherwise, report an error. + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { + return make_error<InstructionError<MCInst>>( + "found an unsupported instruction in the input assembly sequence.", + MCI); + } + + LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); + LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); + + // Create a new empty descriptor. + std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>(); + ID->NumMicroOps = SCDesc.NumMicroOps; + ID->SchedClassID = SchedClassID; + + if (MCDesc.isCall() && FirstCallInst) { + // We don't correctly model calls. + WithColor::warning() << "found a call in the input assembly sequence.\n"; + WithColor::note() << "call instructions are not correctly modeled. " + << "Assume a latency of 100cy.\n"; + FirstCallInst = false; + } + + if (MCDesc.isReturn() && FirstReturnInst) { + WithColor::warning() << "found a return instruction in the input" + << " assembly sequence.\n"; + WithColor::note() << "program counter updates are ignored.\n"; + FirstReturnInst = false; + } + + ID->MayLoad = MCDesc.mayLoad(); + ID->MayStore = MCDesc.mayStore(); + ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); + ID->BeginGroup = SCDesc.BeginGroup; + ID->EndGroup = SCDesc.EndGroup; + + initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); + computeMaxLatency(*ID, MCDesc, SCDesc, STI); + + if (Error Err = verifyOperands(MCDesc, MCI)) + return std::move(Err); + + populateWrites(*ID, MCI, SchedClassID); + populateReads(*ID, MCI, SchedClassID); + + LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); + LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); + + // Sanity check on the instruction descriptor. + if (Error Err = verifyInstrDesc(*ID, MCI)) + return std::move(Err); + + // Now add the new descriptor. + bool IsVariadic = MCDesc.isVariadic(); + if (!IsVariadic && !IsVariant) { + Descriptors[MCI.getOpcode()] = std::move(ID); + return *Descriptors[MCI.getOpcode()]; + } + + VariantDescriptors[&MCI] = std::move(ID); + return *VariantDescriptors[&MCI]; +} + +Expected<const InstrDesc &> +InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { + if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) + return *Descriptors[MCI.getOpcode()]; + + if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) + return *VariantDescriptors[&MCI]; + + return createInstrDescImpl(MCI); +} + +Expected<std::unique_ptr<Instruction>> +InstrBuilder::createInstruction(const MCInst &MCI) { + Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); + if (!DescOrErr) + return DescOrErr.takeError(); + const InstrDesc &D = *DescOrErr; + std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D); + + // Check if this is a dependency breaking instruction. + APInt Mask; + + bool IsZeroIdiom = false; + bool IsDepBreaking = false; + if (MCIA) { + unsigned ProcID = STI.getSchedModel().getProcessorID(); + IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); + IsDepBreaking = + IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); + if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) + NewIS->setOptimizableMove(); + } + + // Initialize Reads first. + for (const ReadDescriptor &RD : D.Reads) { + int RegID = -1; + if (!RD.isImplicitRead()) { + // explicit read. + const MCOperand &Op = MCI.getOperand(RD.OpIndex); + // Skip non-register operands. + if (!Op.isReg()) + continue; + RegID = Op.getReg(); + } else { + // Implicit read. + RegID = RD.RegisterID; + } + + // Skip invalid register operands. + if (!RegID) + continue; + + // Okay, this is a register operand. Create a ReadState for it. + assert(RegID > 0 && "Invalid register ID found!"); + NewIS->getUses().emplace_back(RD, RegID); + ReadState &RS = NewIS->getUses().back(); + + if (IsDepBreaking) { + // A mask of all zeroes means: explicit input operands are not + // independent. + if (Mask.isNullValue()) { + if (!RD.isImplicitRead()) + RS.setIndependentFromDef(); + } else { + // Check if this register operand is independent according to `Mask`. + // Note that Mask may not have enough bits to describe all explicit and + // implicit input operands. If this register operand doesn't have a + // corresponding bit in Mask, then conservatively assume that it is + // dependent. + if (Mask.getBitWidth() > RD.UseIndex) { + // Okay. This map describe register use `RD.UseIndex`. + if (Mask[RD.UseIndex]) + RS.setIndependentFromDef(); + } + } + } + } + + // Early exit if there are no writes. + if (D.Writes.empty()) + return std::move(NewIS); + + // Track register writes that implicitly clear the upper portion of the + // underlying super-registers using an APInt. + APInt WriteMask(D.Writes.size(), 0); + + // Now query the MCInstrAnalysis object to obtain information about which + // register writes implicitly clear the upper portion of a super-register. + if (MCIA) + MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); + + // Initialize writes. + unsigned WriteIndex = 0; + for (const WriteDescriptor &WD : D.Writes) { + unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID + : MCI.getOperand(WD.OpIndex).getReg(); + // Check if this is a optional definition that references NoReg. + if (WD.IsOptionalDef && !RegID) { + ++WriteIndex; + continue; + } + + assert(RegID && "Expected a valid register ID!"); + NewIS->getDefs().emplace_back(WD, RegID, + /* ClearsSuperRegs */ WriteMask[WriteIndex], + /* WritesZero */ IsZeroIdiom); + ++WriteIndex; + } + + return std::move(NewIS); +} +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Instruction.cpp b/contrib/llvm-project/llvm/lib/MCA/Instruction.cpp new file mode 100644 index 000000000000..001842bca318 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Instruction.cpp @@ -0,0 +1,254 @@ +//===--------------------- Instruction.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines abstractions used by the Pipeline to model register reads, +// register writes and instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +void WriteState::writeStartEvent(unsigned IID, unsigned RegID, + unsigned Cycles) { + CRD.IID = IID; + CRD.RegID = RegID; + CRD.Cycles = Cycles; + DependentWriteCyclesLeft = Cycles; + DependentWrite = nullptr; +} + +void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) { + assert(DependentWrites); + assert(CyclesLeft == UNKNOWN_CYCLES); + + // This read may be dependent on more than one write. This typically occurs + // when a definition is the result of multiple writes where at least one + // write does a partial register update. + // The HW is forced to do some extra bookkeeping to track of all the + // dependent writes, and implement a merging scheme for the partial writes. + --DependentWrites; + if (TotalCycles < Cycles) { + CRD.IID = IID; + CRD.RegID = RegID; + CRD.Cycles = Cycles; + TotalCycles = Cycles; + } + + if (!DependentWrites) { + CyclesLeft = TotalCycles; + IsReady = !CyclesLeft; + } +} + +void WriteState::onInstructionIssued(unsigned IID) { + assert(CyclesLeft == UNKNOWN_CYCLES); + // Update the number of cycles left based on the WriteDescriptor info. + CyclesLeft = getLatency(); + + // Now that the time left before write-back is known, notify + // all the users. + for (const std::pair<ReadState *, int> &User : Users) { + ReadState *RS = User.first; + unsigned ReadCycles = std::max(0, CyclesLeft - User.second); + RS->writeStartEvent(IID, RegisterID, ReadCycles); + } + + // Notify any writes that are in a false dependency with this write. + if (PartialWrite) + PartialWrite->writeStartEvent(IID, RegisterID, CyclesLeft); +} + +void WriteState::addUser(unsigned IID, ReadState *User, int ReadAdvance) { + // If CyclesLeft is different than -1, then we don't need to + // update the list of users. We can just notify the user with + // the actual number of cycles left (which may be zero). + if (CyclesLeft != UNKNOWN_CYCLES) { + unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); + User->writeStartEvent(IID, RegisterID, ReadCycles); + return; + } + + Users.emplace_back(User, ReadAdvance); +} + +void WriteState::addUser(unsigned IID, WriteState *User) { + if (CyclesLeft != UNKNOWN_CYCLES) { + User->writeStartEvent(IID, RegisterID, std::max(0, CyclesLeft)); + return; + } + + assert(!PartialWrite && "PartialWrite already set!"); + PartialWrite = User; + User->setDependentWrite(this); +} + +void WriteState::cycleEvent() { + // Note: CyclesLeft can be a negative number. It is an error to + // make it an unsigned quantity because users of this write may + // specify a negative ReadAdvance. + if (CyclesLeft != UNKNOWN_CYCLES) + CyclesLeft--; + + if (DependentWriteCyclesLeft) + DependentWriteCyclesLeft--; +} + +void ReadState::cycleEvent() { + // Update the total number of cycles. + if (DependentWrites && TotalCycles) { + --TotalCycles; + return; + } + + // Bail out immediately if we don't know how many cycles are left. + if (CyclesLeft == UNKNOWN_CYCLES) + return; + + if (CyclesLeft) { + --CyclesLeft; + IsReady = !CyclesLeft; + } +} + +#ifndef NDEBUG +void WriteState::dump() const { + dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID " + << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }"; +} + +void WriteRef::dump() const { + dbgs() << "IID=" << getSourceIndex() << ' '; + if (isValid()) + getWriteState()->dump(); + else + dbgs() << "(null)"; +} +#endif + +const CriticalDependency &Instruction::computeCriticalRegDep() { + if (CriticalRegDep.Cycles) + return CriticalRegDep; + + unsigned MaxLatency = 0; + for (const WriteState &WS : getDefs()) { + const CriticalDependency &WriteCRD = WS.getCriticalRegDep(); + if (WriteCRD.Cycles > MaxLatency) + CriticalRegDep = WriteCRD; + } + + for (const ReadState &RS : getUses()) { + const CriticalDependency &ReadCRD = RS.getCriticalRegDep(); + if (ReadCRD.Cycles > MaxLatency) + CriticalRegDep = ReadCRD; + } + + return CriticalRegDep; +} + +void Instruction::dispatch(unsigned RCUToken) { + assert(Stage == IS_INVALID); + Stage = IS_DISPATCHED; + RCUTokenID = RCUToken; + + // Check if input operands are already available. + if (updateDispatched()) + updatePending(); +} + +void Instruction::execute(unsigned IID) { + assert(Stage == IS_READY); + Stage = IS_EXECUTING; + + // Set the cycles left before the write-back stage. + CyclesLeft = getLatency(); + + for (WriteState &WS : getDefs()) + WS.onInstructionIssued(IID); + + // Transition to the "executed" stage if this is a zero-latency instruction. + if (!CyclesLeft) + Stage = IS_EXECUTED; +} + +void Instruction::forceExecuted() { + assert(Stage == IS_READY && "Invalid internal state!"); + CyclesLeft = 0; + Stage = IS_EXECUTED; +} + +bool Instruction::updatePending() { + assert(isPending() && "Unexpected instruction stage found!"); + + if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); })) + return false; + + // A partial register write cannot complete before a dependent write. + if (!all_of(getDefs(), [](const WriteState &Def) { return Def.isReady(); })) + return false; + + Stage = IS_READY; + return true; +} + +bool Instruction::updateDispatched() { + assert(isDispatched() && "Unexpected instruction stage found!"); + + if (!all_of(getUses(), [](const ReadState &Use) { + return Use.isPending() || Use.isReady(); + })) + return false; + + // A partial register write cannot complete before a dependent write. + if (!all_of(getDefs(), + [](const WriteState &Def) { return !Def.getDependentWrite(); })) + return false; + + Stage = IS_PENDING; + return true; +} + +void Instruction::update() { + if (isDispatched()) + updateDispatched(); + if (isPending()) + updatePending(); +} + +void Instruction::cycleEvent() { + if (isReady()) + return; + + if (isDispatched() || isPending()) { + for (ReadState &Use : getUses()) + Use.cycleEvent(); + + for (WriteState &Def : getDefs()) + Def.cycleEvent(); + + update(); + return; + } + + assert(isExecuting() && "Instruction not in-flight?"); + assert(CyclesLeft && "Instruction already executed?"); + for (WriteState &Def : getDefs()) + Def.cycleEvent(); + CyclesLeft--; + if (!CyclesLeft) + Stage = IS_EXECUTED; +} + +const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max(); + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp b/contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp new file mode 100644 index 000000000000..22b9d0799f77 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Pipeline.cpp @@ -0,0 +1,97 @@ +//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements an ordered container of stages that simulate the +/// pipeline of a hardware backend. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Pipeline.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/Debug.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +void Pipeline::addEventListener(HWEventListener *Listener) { + if (Listener) + Listeners.insert(Listener); + for (auto &S : Stages) + S->addListener(Listener); +} + +bool Pipeline::hasWorkToProcess() { + return any_of(Stages, [](const std::unique_ptr<Stage> &S) { + return S->hasWorkToComplete(); + }); +} + +Expected<unsigned> Pipeline::run() { + assert(!Stages.empty() && "Unexpected empty pipeline found!"); + + do { + notifyCycleBegin(); + if (Error Err = runCycle()) + return std::move(Err); + notifyCycleEnd(); + ++Cycles; + } while (hasWorkToProcess()); + + return Cycles; +} + +Error Pipeline::runCycle() { + Error Err = ErrorSuccess(); + // Update stages before we start processing new instructions. + for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { + const std::unique_ptr<Stage> &S = *I; + Err = S->cycleStart(); + } + + // Now fetch and execute new instructions. + InstRef IR; + Stage &FirstStage = *Stages[0]; + while (!Err && FirstStage.isAvailable(IR)) + Err = FirstStage.execute(IR); + + // Update stages in preparation for a new cycle. + for (const std::unique_ptr<Stage> &S : Stages) { + Err = S->cycleEnd(); + if (Err) + break; + } + + return Err; +} + +void Pipeline::appendStage(std::unique_ptr<Stage> S) { + assert(S && "Invalid null stage in input!"); + if (!Stages.empty()) { + Stage *Last = Stages.back().get(); + Last->setNextInSequence(S.get()); + } + + Stages.push_back(std::move(S)); +} + +void Pipeline::notifyCycleBegin() { + LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n'); + for (HWEventListener *Listener : Listeners) + Listener->onCycleBegin(); +} + +void Pipeline::notifyCycleEnd() { + LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n"); + for (HWEventListener *Listener : Listeners) + Listener->onCycleEnd(); +} +} // namespace mca. +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp new file mode 100644 index 000000000000..7334a268e9a6 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/DispatchStage.cpp @@ -0,0 +1,184 @@ +//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file models the dispatch component of an instruction pipeline. +/// +/// The DispatchStage is responsible for updating instruction dependencies +/// and communicating to the simulated instruction scheduler that an instruction +/// is ready to be scheduled for execution. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/DispatchStage.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +DispatchStage::DispatchStage(const MCSubtargetInfo &Subtarget, + const MCRegisterInfo &MRI, + unsigned MaxDispatchWidth, RetireControlUnit &R, + RegisterFile &F) + : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), + CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) { + if (!DispatchWidth) + DispatchWidth = Subtarget.getSchedModel().IssueWidth; +} + +void DispatchStage::notifyInstructionDispatched(const InstRef &IR, + ArrayRef<unsigned> UsedRegs, + unsigned UOps) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); + notifyEvent<HWInstructionEvent>( + HWInstructionDispatchedEvent(IR, UsedRegs, UOps)); +} + +bool DispatchStage::checkPRF(const InstRef &IR) const { + SmallVector<unsigned, 4> RegDefs; + for (const WriteState &RegDef : IR.getInstruction()->getDefs()) + RegDefs.emplace_back(RegDef.getRegisterID()); + + const unsigned RegisterMask = PRF.isAvailable(RegDefs); + // A mask with all zeroes means: register files are available. + if (RegisterMask) { + notifyEvent<HWStallEvent>( + HWStallEvent(HWStallEvent::RegisterFileStall, IR)); + return false; + } + + return true; +} + +bool DispatchStage::checkRCU(const InstRef &IR) const { + const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + if (RCU.isAvailable(NumMicroOps)) + return true; + notifyEvent<HWStallEvent>( + HWStallEvent(HWStallEvent::RetireControlUnitStall, IR)); + return false; +} + +bool DispatchStage::canDispatch(const InstRef &IR) const { + bool CanDispatch = checkRCU(IR); + CanDispatch &= checkPRF(IR); + CanDispatch &= checkNextStage(IR); + return CanDispatch; +} + +Error DispatchStage::dispatch(InstRef IR) { + assert(!CarryOver && "Cannot dispatch another instruction!"); + Instruction &IS = *IR.getInstruction(); + const InstrDesc &Desc = IS.getDesc(); + const unsigned NumMicroOps = Desc.NumMicroOps; + if (NumMicroOps > DispatchWidth) { + assert(AvailableEntries == DispatchWidth); + AvailableEntries = 0; + CarryOver = NumMicroOps - DispatchWidth; + CarriedOver = IR; + } else { + assert(AvailableEntries >= NumMicroOps); + AvailableEntries -= NumMicroOps; + } + + // Check if this instructions ends the dispatch group. + if (Desc.EndGroup) + AvailableEntries = 0; + + // Check if this is an optimizable reg-reg move. + if (IS.isOptimizableMove()) { + assert(IS.getDefs().size() == 1 && "Expected a single input!"); + assert(IS.getUses().size() == 1 && "Expected a single output!"); + if (PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0])) + IS.setEliminated(); + } + + // A dependency-breaking instruction doesn't have to wait on the register + // input operands, and it is often optimized at register renaming stage. + // Update RAW dependencies if this instruction is not a dependency-breaking + // instruction. A dependency-breaking instruction is a zero-latency + // instruction that doesn't consume hardware resources. + // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. + // + // We also don't update data dependencies for instructions that have been + // eliminated at register renaming stage. + if (!IS.isEliminated()) { + for (ReadState &RS : IS.getUses()) + PRF.addRegisterRead(RS, STI); + } + + // By default, a dependency-breaking zero-idiom is expected to be optimized + // at register renaming stage. That means, no physical register is allocated + // to the instruction. + SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles()); + for (WriteState &WS : IS.getDefs()) + PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); + + // Reserve slots in the RCU, and notify the instruction that it has been + // dispatched to the schedulers for execution. + IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + + // Notify listeners of the "instruction dispatched" event, + // and move IR to the next stage. + notifyInstructionDispatched(IR, RegisterFiles, + std::min(DispatchWidth, NumMicroOps)); + return moveToTheNextStage(IR); +} + +Error DispatchStage::cycleStart() { + PRF.cycleStart(); + + if (!CarryOver) { + AvailableEntries = DispatchWidth; + return ErrorSuccess(); + } + + AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; + unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries; + CarryOver -= DispatchedOpcodes; + assert(CarriedOver && "Invalid dispatched instruction"); + + SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U); + notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes); + if (!CarryOver) + CarriedOver = InstRef(); + return ErrorSuccess(); +} + +bool DispatchStage::isAvailable(const InstRef &IR) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + if (Required > AvailableEntries) + return false; + + if (Desc.BeginGroup && AvailableEntries != DispatchWidth) + return false; + + // The dispatch logic doesn't internally buffer instructions. It only accepts + // instructions that can be successfully moved to the next stage during this + // same cycle. + return canDispatch(IR); +} + +Error DispatchStage::execute(InstRef &IR) { + assert(canDispatch(IR) && "Cannot dispatch another instruction!"); + return dispatch(IR); +} + +#ifndef NDEBUG +void DispatchStage::dump() const { + PRF.dump(); + RCU.dump(); +} +#endif +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp new file mode 100644 index 000000000000..d2f5613a0fb6 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp @@ -0,0 +1,77 @@ +//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the Fetch stage of an instruction pipeline. Its sole +/// purpose in life is to produce instructions for the rest of the pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/EntryStage.h" +#include "llvm/MCA/Instruction.h" + +namespace llvm { +namespace mca { + +bool EntryStage::hasWorkToComplete() const { + return static_cast<bool>(CurrentInstruction); +} + +bool EntryStage::isAvailable(const InstRef & /* unused */) const { + if (CurrentInstruction) + return checkNextStage(CurrentInstruction); + return false; +} + +void EntryStage::getNextInstruction() { + assert(!CurrentInstruction && "There is already an instruction to process!"); + if (!SM.hasNext()) + return; + SourceRef SR = SM.peekNext(); + std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second); + CurrentInstruction = InstRef(SR.first, Inst.get()); + Instructions.emplace_back(std::move(Inst)); + SM.updateNext(); +} + +llvm::Error EntryStage::execute(InstRef & /*unused */) { + assert(CurrentInstruction && "There is no instruction to process!"); + if (llvm::Error Val = moveToTheNextStage(CurrentInstruction)) + return Val; + + // Move the program counter. + CurrentInstruction.invalidate(); + getNextInstruction(); + return llvm::ErrorSuccess(); +} + +llvm::Error EntryStage::cycleStart() { + if (!CurrentInstruction) + getNextInstruction(); + return llvm::ErrorSuccess(); +} + +llvm::Error EntryStage::cycleEnd() { + // Find the first instruction which hasn't been retired. + auto Range = make_range(&Instructions[NumRetired], Instructions.end()); + auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) { + return !I->isRetired(); + }); + + NumRetired = std::distance(Instructions.begin(), It); + // Erase instructions up to the first that hasn't been retired. + if ((NumRetired * 2) >= Instructions.size()) { + Instructions.erase(Instructions.begin(), It); + NumRetired = 0; + } + + return llvm::ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp new file mode 100644 index 000000000000..a2b361fcd1bf --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp @@ -0,0 +1,290 @@ +//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the execution stage of an instruction pipeline. +/// +/// The ExecuteStage is responsible for managing the hardware scheduler +/// and issuing notifications that an instruction has been executed. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/ExecuteStage.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) { + switch (Status) { + case Scheduler::SC_LOAD_QUEUE_FULL: + return HWStallEvent::LoadQueueFull; + case Scheduler::SC_STORE_QUEUE_FULL: + return HWStallEvent::StoreQueueFull; + case Scheduler::SC_BUFFERS_FULL: + return HWStallEvent::SchedulerQueueFull; + case Scheduler::SC_DISPATCH_GROUP_STALL: + return HWStallEvent::DispatchGroupStall; + case Scheduler::SC_AVAILABLE: + return HWStallEvent::Invalid; + } + + llvm_unreachable("Don't know how to process this StallKind!"); +} + +bool ExecuteStage::isAvailable(const InstRef &IR) const { + if (Scheduler::Status S = HWS.isAvailable(IR)) { + HWStallEvent::GenericEventType ET = toHWStallEventType(S); + notifyEvent<HWStallEvent>(HWStallEvent(ET, IR)); + return false; + } + + return true; +} + +Error ExecuteStage::issueInstruction(InstRef &IR) { + SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used; + SmallVector<InstRef, 4> Pending; + SmallVector<InstRef, 4> Ready; + + HWS.issueInstruction(IR, Used, Pending, Ready); + NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps; + + notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); + + notifyInstructionIssued(IR, Used); + if (IR.getInstruction()->isExecuted()) { + notifyInstructionExecuted(IR); + // FIXME: add a buffer of executed instructions. + if (Error S = moveToTheNextStage(IR)) + return S; + } + + for (const InstRef &I : Pending) + notifyInstructionPending(I); + + for (const InstRef &I : Ready) + notifyInstructionReady(I); + return ErrorSuccess(); +} + +Error ExecuteStage::issueReadyInstructions() { + InstRef IR = HWS.select(); + while (IR) { + if (Error Err = issueInstruction(IR)) + return Err; + + // Select the next instruction to issue. + IR = HWS.select(); + } + + return ErrorSuccess(); +} + +Error ExecuteStage::cycleStart() { + SmallVector<ResourceRef, 8> Freed; + SmallVector<InstRef, 4> Executed; + SmallVector<InstRef, 4> Pending; + SmallVector<InstRef, 4> Ready; + + HWS.cycleEvent(Freed, Executed, Pending, Ready); + NumDispatchedOpcodes = 0; + NumIssuedOpcodes = 0; + + for (const ResourceRef &RR : Freed) + notifyResourceAvailable(RR); + + for (InstRef &IR : Executed) { + notifyInstructionExecuted(IR); + // FIXME: add a buffer of executed instructions. + if (Error S = moveToTheNextStage(IR)) + return S; + } + + for (const InstRef &IR : Pending) + notifyInstructionPending(IR); + + for (const InstRef &IR : Ready) + notifyInstructionReady(IR); + + return issueReadyInstructions(); +} + +Error ExecuteStage::cycleEnd() { + if (!EnablePressureEvents) + return ErrorSuccess(); + + // Always conservatively report any backpressure events if the dispatch logic + // was stalled due to unavailable scheduler resources. + if (!HWS.hadTokenStall() && NumDispatchedOpcodes <= NumIssuedOpcodes) + return ErrorSuccess(); + + SmallVector<InstRef, 8> Insts; + uint64_t Mask = HWS.analyzeResourcePressure(Insts); + if (Mask) { + LLVM_DEBUG(dbgs() << "[E] Backpressure increased because of unavailable " + "pipeline resources: " + << format_hex(Mask, 16) << '\n'); + HWPressureEvent Ev(HWPressureEvent::RESOURCES, Insts, Mask); + notifyEvent(Ev); + } + + SmallVector<InstRef, 8> RegDeps; + SmallVector<InstRef, 8> MemDeps; + HWS.analyzeDataDependencies(RegDeps, MemDeps); + if (RegDeps.size()) { + LLVM_DEBUG( + dbgs() << "[E] Backpressure increased by register dependencies\n"); + HWPressureEvent Ev(HWPressureEvent::REGISTER_DEPS, RegDeps); + notifyEvent(Ev); + } + + if (MemDeps.size()) { + LLVM_DEBUG(dbgs() << "[E] Backpressure increased by memory dependencies\n"); + HWPressureEvent Ev(HWPressureEvent::MEMORY_DEPS, MemDeps); + notifyEvent(Ev); + } + + return ErrorSuccess(); +} + +#ifndef NDEBUG +static void verifyInstructionEliminated(const InstRef &IR) { + const Instruction &Inst = *IR.getInstruction(); + assert(Inst.isEliminated() && "Instruction was not eliminated!"); + assert(Inst.isReady() && "Instruction in an inconsistent state!"); + + // Ensure that instructions eliminated at register renaming stage are in a + // consistent state. + const InstrDesc &Desc = Inst.getDesc(); + assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!"); +} +#endif + +Error ExecuteStage::handleInstructionEliminated(InstRef &IR) { +#ifndef NDEBUG + verifyInstructionEliminated(IR); +#endif + notifyInstructionPending(IR); + notifyInstructionReady(IR); + notifyInstructionIssued(IR, {}); + IR.getInstruction()->forceExecuted(); + notifyInstructionExecuted(IR); + return moveToTheNextStage(IR); +} + +// Schedule the instruction for execution on the hardware. +Error ExecuteStage::execute(InstRef &IR) { + assert(isAvailable(IR) && "Scheduler is not available!"); + +#ifndef NDEBUG + // Ensure that the HWS has not stored this instruction in its queues. + HWS.sanityCheck(IR); +#endif + + if (IR.getInstruction()->isEliminated()) + return handleInstructionEliminated(IR); + + // Reserve a slot in each buffered resource. Also, mark units with + // BufferSize=0 as reserved. Resources with a buffer size of zero will only + // be released after MCIS is issued, and all the ResourceCycles for those + // units have been consumed. + bool IsReadyInstruction = HWS.dispatch(IR); + const Instruction &Inst = *IR.getInstruction(); + NumDispatchedOpcodes += Inst.getDesc().NumMicroOps; + notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); + + if (!IsReadyInstruction) { + if (Inst.isPending()) + notifyInstructionPending(IR); + return ErrorSuccess(); + } + + notifyInstructionPending(IR); + + // If we did not return early, then the scheduler is ready for execution. + notifyInstructionReady(IR); + + // If we cannot issue immediately, the HWS will add IR to its ready queue for + // execution later, so we must return early here. + if (!HWS.mustIssueImmediately(IR)) + return ErrorSuccess(); + + // Issue IR to the underlying pipelines. + return issueInstruction(IR); +} + +void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n'); + notifyEvent<HWInstructionEvent>( + HWInstructionEvent(HWInstructionEvent::Executed, IR)); +} + +void ExecuteStage::notifyInstructionPending(const InstRef &IR) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Pending: #" << IR << '\n'); + notifyEvent<HWInstructionEvent>( + HWInstructionEvent(HWInstructionEvent::Pending, IR)); +} + +void ExecuteStage::notifyInstructionReady(const InstRef &IR) const { + LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n'); + notifyEvent<HWInstructionEvent>( + HWInstructionEvent(HWInstructionEvent::Ready, IR)); +} + +void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const { + LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' + << RR.second << "]\n"); + for (HWEventListener *Listener : getListeners()) + Listener->onResourceAvailable(RR); +} + +void ExecuteStage::notifyInstructionIssued( + const InstRef &IR, + MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const { + LLVM_DEBUG({ + dbgs() << "[E] Instruction Issued: #" << IR << '\n'; + for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) { + assert(Resource.second.getDenominator() == 1 && "Invalid cycles!"); + dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' + << Resource.first.second << "], "; + dbgs() << "cycles: " << Resource.second.getNumerator() << '\n'; + } + }); + + // Replace resource masks with valid resource processor IDs. + for (std::pair<ResourceRef, ResourceCycles> &Use : Used) + Use.first.first = HWS.getResourceID(Use.first.first); + + notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used)); +} + +void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, + bool Reserved) const { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + if (Desc.Buffers.empty()) + return; + + SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); + std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), + [&](uint64_t Op) { return HWS.getResourceID(Op); }); + if (Reserved) { + for (HWEventListener *Listener : getListeners()) + Listener->onReservedBuffers(IR, BufferIDs); + return; + } + + for (HWEventListener *Listener : getListeners()) + Listener->onReleasedBuffers(IR, BufferIDs); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp new file mode 100644 index 000000000000..adeefb45ec2d --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp @@ -0,0 +1,68 @@ +//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the method InstructionTables::execute(). +/// Method execute() prints a theoretical resource pressure distribution based +/// on the information available in the scheduling model, and without running +/// the pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/InstructionTables.h" + +namespace llvm { +namespace mca { + +Error InstructionTables::execute(InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + UsedResources.clear(); + + // Identify the resources consumed by this instruction. + for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) { + // Skip zero-cycle resources (i.e., unused resources). + if (!Resource.second.size()) + continue; + unsigned Cycles = Resource.second.size(); + unsigned Index = std::distance( + Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first)); + const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index); + unsigned NumUnits = ProcResource.NumUnits; + if (!ProcResource.SubUnitsIdxBegin) { + // The number of cycles consumed by each unit. + for (unsigned I = 0, E = NumUnits; I < E; ++I) { + ResourceRef ResourceUnit = std::make_pair(Index, 1U << I); + UsedResources.emplace_back( + std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits))); + } + continue; + } + + // This is a group. Obtain the set of resources contained in this + // group. Some of these resources may implement multiple units. + // Uniformly distribute Cycles across all of the units. + for (unsigned I1 = 0; I1 < NumUnits; ++I1) { + unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1]; + const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx); + // Compute the number of cycles consumed by each resource unit. + for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) { + ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2); + UsedResources.emplace_back(std::make_pair( + ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits))); + } + } + } + + // Send a fake instruction issued event to all the views. + HWInstructionIssuedEvent Event(IR, UsedResources); + notifyEvent<HWInstructionIssuedEvent>(Event); + return ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp new file mode 100644 index 000000000000..cb3e4c6979a4 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/MicroOpQueueStage.cpp @@ -0,0 +1,70 @@ +//===---------------------- MicroOpQueueStage.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the MicroOpQueueStage. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/MicroOpQueueStage.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +Error MicroOpQueueStage::moveInstructions() { + InstRef IR = Buffer[CurrentInstructionSlotIdx]; + while (IR && checkNextStage(IR)) { + if (llvm::Error Val = moveToTheNextStage(IR)) + return Val; + + Buffer[CurrentInstructionSlotIdx].invalidate(); + unsigned NormalizedOpcodes = getNormalizedOpcodes(IR); + CurrentInstructionSlotIdx += NormalizedOpcodes; + CurrentInstructionSlotIdx %= Buffer.size(); + AvailableEntries += NormalizedOpcodes; + IR = Buffer[CurrentInstructionSlotIdx]; + } + + return llvm::ErrorSuccess(); +} + +MicroOpQueueStage::MicroOpQueueStage(unsigned Size, unsigned IPC, + bool ZeroLatencyStage) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), MaxIPC(IPC), + CurrentIPC(0), IsZeroLatencyStage(ZeroLatencyStage) { + Buffer.resize(Size ? Size : 1); + AvailableEntries = Buffer.size(); +} + +Error MicroOpQueueStage::execute(InstRef &IR) { + Buffer[NextAvailableSlotIdx] = IR; + unsigned NormalizedOpcodes = getNormalizedOpcodes(IR); + NextAvailableSlotIdx += NormalizedOpcodes; + NextAvailableSlotIdx %= Buffer.size(); + AvailableEntries -= NormalizedOpcodes; + ++CurrentIPC; + return llvm::ErrorSuccess(); +} + +Error MicroOpQueueStage::cycleStart() { + CurrentIPC = 0; + if (!IsZeroLatencyStage) + return moveInstructions(); + return llvm::ErrorSuccess(); +} + +Error MicroOpQueueStage::cycleEnd() { + if (IsZeroLatencyStage) + return moveInstructions(); + return llvm::ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp new file mode 100644 index 000000000000..e1789dd7fa2a --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/RetireStage.cpp @@ -0,0 +1,61 @@ +//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the retire stage of an instruction pipeline. +/// The RetireStage represents the process logic that interacts with the +/// simulated RetireControlUnit hardware. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/RetireStage.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +llvm::Error RetireStage::cycleStart() { + if (RCU.isEmpty()) + return llvm::ErrorSuccess(); + + const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); + unsigned NumRetired = 0; + while (!RCU.isEmpty()) { + if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) + break; + const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + if (!Current.Executed) + break; + RCU.consumeCurrentToken(); + notifyInstructionRetired(Current.IR); + NumRetired++; + } + + return llvm::ErrorSuccess(); +} + +llvm::Error RetireStage::execute(InstRef &IR) { + RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); + return llvm::ErrorSuccess(); +} + +void RetireStage::notifyInstructionRetired(const InstRef &IR) const { + LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); + llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles()); + const Instruction &Inst = *IR.getInstruction(); + + for (const WriteState &WS : Inst.getDefs()) + PRF.removeRegisterWrite(WS, FreedRegs); + notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs)); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp new file mode 100644 index 000000000000..ed512ac9711c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Stages/Stage.cpp @@ -0,0 +1,28 @@ +//===---------------------- Stage.cpp ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a stage. +/// A chain of stages compose an instruction pipeline. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/Stage.h" + +namespace llvm { +namespace mca { + +// Pin the vtable here in the implementation file. +Stage::~Stage() = default; + +void Stage::addListener(HWEventListener *Listener) { + Listeners.insert(Listener); +} + +} // namespace mca +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/MCA/Support.cpp b/contrib/llvm-project/llvm/lib/MCA/Support.cpp new file mode 100644 index 000000000000..ce1f0f6f211b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/MCA/Support.cpp @@ -0,0 +1,110 @@ +//===--------------------- Support.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a few helper functions used by various pipeline +/// components. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Support.h" +#include "llvm/MC/MCSchedule.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +ResourceCycles &ResourceCycles::operator+=(const ResourceCycles &RHS) { + if (Denominator == RHS.Denominator) + Numerator += RHS.Numerator; + else { + // Create a common denominator for LHS and RHS by calculating the least + // common multiple from the GCD. + unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator); + unsigned LCM = (Denominator * RHS.Denominator) / GCD; + unsigned LHSNumerator = Numerator * (LCM / Denominator); + unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator); + Numerator = LHSNumerator + RHSNumerator; + Denominator = LCM; + } + return *this; +} + +void computeProcResourceMasks(const MCSchedModel &SM, + MutableArrayRef<uint64_t> Masks) { + unsigned ProcResourceID = 0; + + assert(Masks.size() == SM.getNumProcResourceKinds() && + "Invalid number of elements"); + // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it. + Masks[0] = 0; + + // Create a unique bitmask for every processor resource unit. + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + ProcResourceID++; + } + + // Create a unique bitmask for every processor resource group. + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + if (!Desc.SubUnitsIdxBegin) + continue; + Masks[I] = 1ULL << ProcResourceID; + for (unsigned U = 0; U < Desc.NumUnits; ++U) { + uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]]; + Masks[I] |= OtherMask; + } + ProcResourceID++; + } + +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "\nProcessor resource masks:" + << "\n"); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + LLVM_DEBUG(dbgs() << '[' << format_decimal(I,2) << "] " << " - " + << format_hex(Masks[I],16) << " - " + << Desc.Name << '\n'); + } +#endif +} + +double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, + unsigned NumMicroOps, + ArrayRef<unsigned> ProcResourceUsage) { + // The block throughput is bounded from above by the hardware dispatch + // throughput. That is because the DispatchWidth is an upper bound on the + // number of opcodes that can be part of a single dispatch group. + double Max = static_cast<double>(NumMicroOps) / DispatchWidth; + + // The block throughput is also limited by the amount of hardware parallelism. + // The number of available resource units affects the resource pressure + // distribution, as well as how many blocks can be executed every cycle. + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned ResourceCycles = ProcResourceUsage[I]; + if (!ResourceCycles) + continue; + + const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); + double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits; + Max = std::max(Max, Throughput); + } + + // The block reciprocal throughput is computed as the MAX of: + // - (NumMicroOps / DispatchWidth) + // - (NumUnits / ResourceCycles) for every consumed processor resource. + return Max; +} + +} // namespace mca +} // namespace llvm |