diff options
Diffstat (limited to 'lib/CodeGen')
239 files changed, 16327 insertions, 10945 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5abf50e5bd10..ffcb9a09ad73 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -1,4 +1,4 @@ -//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// +//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===// // // The LLVM Compiler Infrastructure // @@ -15,16 +15,33 @@ //===----------------------------------------------------------------------===// #include "AggressiveAntiDepBreaker.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <map> +#include <set> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "post-RA-sched" @@ -34,18 +51,17 @@ static cl::opt<int> DebugDiv("agg-antidep-debugdiv", cl::desc("Debug control for aggressive anti-dep breaker"), cl::init(0), cl::Hidden); + static cl::opt<int> DebugMod("agg-antidep-debugmod", cl::desc("Debug control for aggressive anti-dep breaker"), cl::init(0), cl::Hidden); AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, - MachineBasicBlock *BB) : - NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), - GroupNodeIndices(TargetRegs, 0), - KillIndices(TargetRegs, 0), - DefIndices(TargetRegs, 0) -{ + MachineBasicBlock *BB) + : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) { const unsigned BBSize = BB->size(); for (unsigned i = 0; i < NumTargetRegs; ++i) { // Initialize all registers to be in their own group. Initially we @@ -76,8 +92,7 @@ void AggressiveAntiDepState::GetGroupRegs( } } -unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) -{ +unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) { assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); @@ -92,8 +107,7 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) return Parent; } -unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) -{ +unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) { // Create a new GroupNode for Reg. Reg's existing GroupNode must // stay as is because there could be other GroupNodes referring to // it. @@ -103,8 +117,7 @@ unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) return idx; } -bool AggressiveAntiDepState::IsLive(unsigned Reg) -{ +bool AggressiveAntiDepState::IsLive(unsigned Reg) { // KillIndex must be defined and DefIndex not defined for a register // to be live. return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); @@ -115,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( TargetSubtargetInfo::RegClassVector &CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()), - TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), - State(nullptr) { + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -129,7 +141,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); DEBUG(for (unsigned r : CriticalPathSet.set_bits()) - dbgs() << " " << TRI->getName(r)); + dbgs() << " " << printReg(r, TRI)); DEBUG(dbgs() << '\n'); } @@ -204,7 +216,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, // schedule region). if (State->IsLive(Reg)) { DEBUG(if (State->GetGroup(Reg) != 0) - dbgs() << " " << TRI->getName(Reg) << "=g" << + dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg) << "->g0(region live-out)"); State->UnionGroups(Reg, 0); } else if ((DefIndices[Reg] < InsertPosIndex) @@ -250,7 +262,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs( /// AntiDepEdges - Return in Edges the anti- and output- dependencies /// in SU that we want to consider for breaking. -static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { +static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) { SmallSet<unsigned, 4> RegSet; for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { @@ -311,7 +323,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, RegRefs.erase(Reg); State->LeaveGroup(Reg); DEBUG(if (header) { - dbgs() << header << TRI->getName(Reg); header = nullptr; }); + dbgs() << header << printReg(Reg, TRI); header = nullptr; }); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); // Repeat for subregisters. Note that we only do this if the superregister // was not live because otherwise, regardless whether we have an explicit @@ -325,8 +337,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, RegRefs.erase(SubregReg); State->LeaveGroup(SubregReg); DEBUG(if (header) { - dbgs() << header << TRI->getName(Reg); header = nullptr; }); - DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << + dbgs() << header << printReg(Reg, TRI); header = nullptr; }); + DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" << State->GetGroup(SubregReg) << tag); } } @@ -362,7 +374,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers @@ -381,8 +393,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction( unsigned AliasReg = *AI; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); - DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << - TRI->getName(AliasReg) << ")"); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " + << printReg(AliasReg, TRI) << ")"); } } @@ -436,11 +448,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, // FIXME: The issue with predicated instruction is more complex. We are being // conservatively here because the kill markers cannot be trusted after // if-conversion: - // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] // ... - // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] - // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] - // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395] + // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12] + // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) // // The first R6 kill is not really a kill since it's killed by a predicated // instruction which may not be executed. The second R6 def may or may not @@ -457,8 +469,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << - State->GetGroup(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); // It wasn't previously live but now it is, this is a kill. Forget // the previous live-range information and start a new live-range @@ -493,10 +504,10 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, if (Reg == 0) continue; if (FirstReg != 0) { - DEBUG(dbgs() << "=" << TRI->getName(Reg)); + DEBUG(dbgs() << "=" << printReg(Reg, TRI)); State->UnionGroups(FirstReg, Reg); } else { - DEBUG(dbgs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI)); FirstReg = Reg; } } @@ -544,8 +555,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // break the anti-dependence. std::vector<unsigned> Regs; State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); - assert(Regs.size() > 0 && "Empty register group!"); - if (Regs.size() == 0) + assert(!Regs.empty() && "Empty register group!"); + if (Regs.empty()) return false; // Find the "superest" register in the group. At the same time, @@ -562,7 +573,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // If Reg has any references, then collect possible rename regs if (RegRefs.count(Reg) > 0) { - DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); + DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":"); BitVector &BV = RenameRegisterMap[Reg]; assert(BV.empty()); @@ -571,7 +582,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG({ dbgs() << " ::"; for (unsigned r : BV.set_bits()) - dbgs() << " " << TRI->getName(r); + dbgs() << " " << printReg(r, TRI); dbgs() << "\n"; }); } @@ -596,8 +607,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (renamecnt++ % DebugDiv != DebugMod) return false; - dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << - " for debug ***\n"; + dbgs() << "*** Performing rename " << printReg(SuperReg, TRI) + << " for debug ***\n"; } #endif @@ -634,7 +645,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; - DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); + DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); RenameMap.clear(); // For each referenced group register (which must be a SuperReg or @@ -651,7 +662,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); } - DEBUG(dbgs() << " " << TRI->getName(NewReg)); + DEBUG(dbgs() << " " << printReg(NewReg, TRI)); // Check if Reg can be renamed to NewReg. if (!RenameRegisterMap[Reg].test(NewReg)) { @@ -672,7 +683,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AliasReg = *AI; if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { - DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); + DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)"); found = true; break; } @@ -732,14 +743,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( /// BreakAntiDependencies - Identifiy anti-dependencies within the /// ScheduleDAG and break them by renaming registers. -/// unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( - const std::vector<SUnit>& SUnits, + const std::vector<SUnit> &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) { - std::vector<unsigned> &KillIndices = State->GetKillIndices(); std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& @@ -783,7 +792,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (!State->IsLive(Reg)) - DEBUG(dbgs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI)); } DEBUG(dbgs() << '\n'); #endif @@ -839,7 +848,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( (Edge->getKind() != SDep::Output)) continue; unsigned AntiDepReg = Edge->getReg(); - DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); + DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!MRI.isAllocatable(AntiDepReg)) { @@ -942,7 +951,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::map<unsigned, unsigned> RenameMap; if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { DEBUG(dbgs() << "\tBreaking anti-dependence edge on " - << TRI->getName(AntiDepReg) << ":"); + << printReg(AntiDepReg, TRI) << ":"); // Handle each group register... for (std::map<unsigned, unsigned>::iterator @@ -950,9 +959,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( unsigned CurrReg = S->first; unsigned NewReg = S->second; - DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << - TRI->getName(NewReg) << "(" << - RegRefs.count(CurrReg) << " refs)"); + DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" + << printReg(NewReg, TRI) << "(" + << RegRefs.count(CurrReg) << " refs)"); // Update the references to the old register CurrReg to // refer to the new register NewReg. diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index f97e6666b219..5dce3c2499e5 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -19,29 +19,35 @@ #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Compiler.h" #include <map> +#include <set> +#include <vector> namespace llvm { + +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; class RegisterClassInfo; +class TargetInstrInfo; +class TargetRegisterClass; +class TargetRegisterInfo; /// Contains all the state necessary for anti-dep breaking. class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { public: /// Information about a register reference within a liverange - typedef struct { + struct RegisterReference { /// The registers operand MachineOperand *Operand; + /// The register class const TargetRegisterClass *RC; - } RegisterReference; + }; private: /// Number of non-virtual target registers (i.e. TRI->getNumRegs()). @@ -110,7 +116,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker : public AntiDepBreaker { - MachineFunction& MF; + MachineFunction &MF; MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -121,10 +127,10 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { BitVector CriticalPathSet; /// The state used to identify and rename anti-dependence registers. - AggressiveAntiDepState *State; + AggressiveAntiDepState *State = nullptr; public: - AggressiveAntiDepBreaker(MachineFunction& MFi, + AggressiveAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker() override; @@ -134,8 +140,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. - /// - unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -143,7 +148,6 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { /// Update liveness information to account for the current /// instruction, which will not be scheduled. - /// void Observe(MachineInstr &MI, unsigned Count, unsigned InsertPosIndex) override; @@ -152,7 +156,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { private: /// Keep track of a position in the allocation order for each regclass. - typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; + using RenameOrderType = std::map<const TargetRegisterClass *, unsigned>; /// Return true if MO represents a register /// that is both implicitly used and defined in MI @@ -174,6 +178,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { RenameOrderType& RenameOrder, std::map<unsigned, unsigned> &RenameMap); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index d840a2f69ab3..8e8c1d8e08d1 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -31,18 +31,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix) - : Pos(0) { + : Pos(0), HardHints(false) { const MachineFunction &MF = VRM.getMachineFunction(); const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); - TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix); + if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix)) + HardHints = true; rewind(); DEBUG({ if (!Hints.empty()) { dbgs() << "hints:"; for (unsigned I = 0, E = Hints.size(); I != E; ++I) - dbgs() << ' ' << PrintReg(Hints[I], TRI); + dbgs() << ' ' << printReg(Hints[I], TRI); dbgs() << '\n'; } }); diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 8223a52e333b..467bcc2edc6f 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -32,7 +32,11 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder { ArrayRef<MCPhysReg> Order; int Pos; + // If HardHints is true, *only* Hints will be returned. + bool HardHints; + public: + /// Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. @@ -51,6 +55,8 @@ public: unsigned next(unsigned Limit = 0) { if (Pos < 0) return Hints.end()[Pos++]; + if (HardHints) + return 0; if (!Limit) Limit = Order.size(); while (Pos < int(Limit)) { @@ -68,6 +74,8 @@ public: unsigned nextWithDups(unsigned Limit) { if (Pos < 0) return Hints.end()[Pos++]; + if (HardHints) + return 0; if (Pos < int(Limit)) return Order[Pos++]; return 0; diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index c2aecc651b79..0731ae575437 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -14,7 +14,9 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -24,9 +26,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; @@ -565,6 +564,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, return false; const Value *RetVal = Ret->getOperand(0), *CallVal = I; + // Intrinsic like llvm.memcpy has no return value, but the expanded + // libcall may or may not have return value. On most platforms, it + // will be expanded as memcpy in libc, which returns the first + // argument. On other platforms like arm-none-eabi, memcpy may be + // expanded as library call without return value, like __aeabi_memcpy. + const CallInst *Call = cast<CallInst>(I); + if (Function *F = Call->getCalledFunction()) { + Intrinsic::ID IID = F->getIntrinsicID(); + if (((IID == Intrinsic::memcpy && + TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) || + (IID == Intrinsic::memmove && + TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) || + (IID == Intrinsic::memset && + TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) && + RetVal == Call->getArgOperand(0)) + return true; + } + SmallVector<unsigned, 4> RetPath, CallPath; SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; @@ -651,7 +668,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) { int EntryBBNumber = MF.front().getNumber(); bool IsSEH = isAsynchronousEHPersonality( - classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + classifyEHPersonality(MF.getFunction().getPersonalityFn())); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index d14d93100adb..181da83dc88b 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=// +//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,12 +15,14 @@ #ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H #define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include <cassert> +#include <utility> #include <vector> namespace llvm { @@ -29,17 +31,17 @@ namespace llvm { /// registers to break register anti-dependencies (WAR hazards). class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { public: - typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > - DbgValueVector; + using DbgValueVector = + std::vector<std::pair<MachineInstr *, MachineInstr *>>; virtual ~AntiDepBreaker(); /// Initialize anti-dep breaking for a new basic block. - virtual void StartBlock(MachineBasicBlock *BB) =0; + virtual void StartBlock(MachineBasicBlock *BB) = 0; /// Identifiy anti-dependencies within a basic-block region and break them by /// renaming registers. Return the number of anti-dependencies broken. - virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -51,7 +53,7 @@ public: unsigned InsertPosIndex) = 0; /// Finish anti-dep breaking for a basic block. - virtual void FinishBlock() =0; + virtual void FinishBlock() = 0; /// Update DBG_VALUE if dependency breaker is updating /// other machine instruction to use NewReg. @@ -81,6 +83,6 @@ public: } }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 8b1376ab363d..15cfbd5c40ff 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -12,26 +12,19 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} @@ -67,16 +60,16 @@ void ARMException::beginFunction(const MachineFunction *MF) { /// void ARMException::endFunction(const MachineFunction *MF) { ARMTargetStreamer &ATS = getTargetStreamer(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); const Function *Per = nullptr; - if (F->hasPersonalityFn()) - Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); bool forceEmitPersonality = - F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && - F->needsUnwindTableEntry(); + F.hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F.needsUnwindTableEntry(); bool shouldEmitPersonality = forceEmitPersonality || !MF->getLandingPads().empty(); - if (!Asm->MF->getFunction()->needsUnwindTableEntry() && + if (!Asm->MF->getFunction().needsUnwindTableEntry() && !shouldEmitPersonality) ATS.emitCantUnwind(); else if (shouldEmitPersonality) { diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp index ec552e0640e9..59ed0324bdb0 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===// +//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===// // // The LLVM Compiler Infrastructure // @@ -8,9 +8,12 @@ //===----------------------------------------------------------------------===// #include "AddressPool.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include <utility> using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h index ba3e3b7c315d..990a158d87cd 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===// +//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,11 +11,13 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H #include "llvm/ADT/DenseMap.h" -#include "llvm/MC/MCSymbol.h" namespace llvm { -class MCSection; + class AsmPrinter; +class MCSection; +class MCSymbol; + // Collection of addresses for this unit and assorted labels. // A Symbol->unsigned mapping of addresses used by indirect // references. @@ -23,6 +25,7 @@ class AddressPool { struct AddressPoolEntry { unsigned Number; bool TLS; + AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} }; DenseMap<const MCSymbol *, AddressPoolEntry> Pool; @@ -31,10 +34,10 @@ class AddressPool { /// the last "resetUsedFlag" call. Used to implement type unit fallback - a /// type that references addresses cannot be placed in a type unit when using /// fission. - bool HasBeenUsed; + bool HasBeenUsed = false; public: - AddressPool() : HasBeenUsed(false) {} + AddressPool() = default; /// \brief Returns the index into the address pool with the given /// label/symbol. @@ -48,5 +51,7 @@ public: void resetUsedFlag() { HasBeenUsed = false; } }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index ff427c9a0d75..31037095aa2b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -29,10 +29,11 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -46,10 +47,19 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -62,14 +72,18 @@ #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodePadder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" @@ -78,29 +92,28 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetOptions.h" #include <algorithm> #include <cassert> #include <cinttypes> #include <cstdint> +#include <iterator> #include <limits> #include <memory> #include <string> @@ -129,7 +142,8 @@ static cl::opt<bool> char AsmPrinter::ID = 0; -typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type; +using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>; + static gcp_map_type &getGCMap(void *&P) { if (!P) P = new gcp_map_type(); @@ -184,7 +198,6 @@ bool AsmPrinter::isPositionIndependent() const { } /// getFunctionNumber - Return a unique ID for the current function. -/// unsigned AsmPrinter::getFunctionNumber() const { return MF->getFunctionNumber(); } @@ -221,8 +234,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); - if (isVerbose()) - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineLoopInfo>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -242,28 +254,8 @@ bool AsmPrinter::doInitialization(Module &M) { // alternative is duplicated code in each of the target asm printers that // use the directive, where it would need the same conditionalization // anyway. - const Triple &TT = TM.getTargetTriple(); - // If there is a version specified, Major will be non-zero. - if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) { - unsigned Major, Minor, Update; - MCVersionMinType VersionType; - if (TT.isWatchOS()) { - VersionType = MCVM_WatchOSVersionMin; - TT.getWatchOSVersion(Major, Minor, Update); - } else if (TT.isTvOS()) { - VersionType = MCVM_TvOSVersionMin; - TT.getiOSVersion(Major, Minor, Update); - } else if (TT.isMacOSX()) { - VersionType = MCVM_OSXVersionMin; - if (!TT.getMacOSXVersion(Major, Minor, Update)) - Major = 0; - } else { - VersionType = MCVM_IOSVersionMin; - TT.getiOSVersion(Major, Minor, Update); - } - if (Major != 0) - OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); - } + const Triple &Target = TM.getTargetTriple(); + OutStreamer->EmitVersionForTarget(Target); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -272,7 +264,8 @@ bool AsmPrinter::doInitialization(Module &M) { // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { // .file "foo.c" - OutStreamer->EmitFileDirective(M.getSourceFileName()); + OutStreamer->EmitFileDirective( + llvm::sys::path::filename(M.getSourceFileName())); } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -628,35 +621,35 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value, /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); if (isVerbose()) OutStreamer->GetCommentOS() << "-- Begin function " - << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n'; + << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; // Print out constants referenced by the function EmitConstantPool(); // Print the 'header' of function. - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM)); - EmitVisibility(CurrentFnSym, F->getVisibility()); + OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); + EmitVisibility(CurrentFnSym, F.getVisibility()); - EmitLinkage(F, CurrentFnSym); + EmitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) - EmitAlignment(MF->getAlignment(), F); + EmitAlignment(MF->getAlignment(), &F); if (MAI->hasDotTypeDotSizeDirective()) OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); if (isVerbose()) { - F->printAsOperand(OutStreamer->GetCommentOS(), - /*PrintType=*/false, F->getParent()); + F.printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, F.getParent()); OutStreamer->GetCommentOS() << '\n'; } // Emit the prefix data. - if (F->hasPrefixData()) { + if (F.hasPrefixData()) { if (MAI->hasSubsectionsViaSymbols()) { // Preserving prefix data on platforms which use subsections-via-symbols // is a bit tricky. Here we introduce a symbol for the prefix data @@ -665,12 +658,12 @@ void AsmPrinter::EmitFunctionHeader() { MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol(); OutStreamer->EmitLabel(PrefixSym); - EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); // Emit an .alt_entry directive for the actual function symbol. OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); } else { - EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); } } @@ -682,7 +675,7 @@ void AsmPrinter::EmitFunctionHeader() { // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. std::vector<MCSymbol*> DeadBlockSyms; - MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms); + MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { OutStreamer->AddComment("Address taken block that was later removed"); OutStreamer->EmitLabel(DeadBlockSyms[i]); @@ -707,8 +700,8 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit the prologue data. - if (F->hasPrologueData()) - EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData()); + if (F.hasPrologueData()) + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -729,9 +722,11 @@ void AsmPrinter::EmitFunctionEntryLabel() { } /// emitComments - Pretty-print comments for instructions. -static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS, +/// It returns true iff the sched comment was emitted. +/// Otherwise it returns false. +static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, AsmPrinter *AP) { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Check for spills and reloads @@ -773,12 +768,16 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS, CommentOS << " Reload Reuse"; } - if (Commented && AP->EnablePrintSchedInfo) - // If any comment was added above and we need sched info comment then - // add this new comment just after the above comment w/o "\n" between them. - CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; - else if (Commented) + if (Commented) { + if (AP->EnablePrintSchedInfo) { + // If any comment was added above and we need sched info comment then add + // this new comment just after the above comment w/o "\n" between them. + CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; + return true; + } CommentOS << "\n"; + } + return false; } /// emitImplicitDef - This method emits the specified machine instruction @@ -789,7 +788,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { SmallString<128> Str; raw_svector_ostream OS(Str); OS << "implicit-def: " - << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo()); + << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); OutStreamer->AddComment(OS.str()); OutStreamer->AddBlankLine(); @@ -802,10 +801,8 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); - OS << ' ' - << PrintReg(Op.getReg(), - AP.MF->getSubtarget().getRegisterInfo()) - << (Op.isDef() ? "<def>" : "<kill>"); + OS << ' ' << (Op.isDef() ? "def " : "killed ") + << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); } AP.OutStreamer->AddComment(OS.str()); AP.OutStreamer->AddBlankLine(); @@ -890,7 +887,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (MemLoc) OS << '['; - OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); + OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } if (MemLoc) @@ -901,9 +898,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { return true; } -AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { +AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && - MF->getFunction()->needsUnwindTableEntry()) + MF->getFunction().needsUnwindTableEntry()) return CFI_M_EH; if (MMI->hasDebugInfo()) @@ -913,7 +910,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry(); + return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { @@ -951,6 +948,31 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { + if (!MF.getTarget().Options.EmitStackSizeSection) + return; + + MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection(); + if (!StackSizeSection) + return; + + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + // Don't emit functions with dynamic stack allocations. + if (FrameInfo.hasVarSizedObjects()) + return; + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(StackSizeSection); + + const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction()); + uint64_t StackSize = FrameInfo.getStackSize(); + OutStreamer->EmitValue(MCSymbolRefExpr::create(FunctionSymbol, OutContext), + /* size = */ 8); + OutStreamer->EmitULEB128IntValue(StackSize); + + OutStreamer->PopSection(); +} + static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, MachineModuleInfo *MMI) { if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) @@ -958,10 +980,10 @@ static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, // We might emit an EH table that uses function begin and end labels even if // we don't have any landingpads. - if (!MF.getFunction()->hasPersonalityFn()) + if (!MF.getFunction().hasPersonalityFn()) return false; return !isNoOpWithoutInvoke( - classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + classifyEHPersonality(MF.getFunction().getPersonalityFn())); } /// EmitFunctionBody - This method emits the body and trailer for a @@ -981,7 +1003,6 @@ void AsmPrinter::EmitFunctionBody() { // Print a label for the basic block. EmitBasicBlockStart(MBB); for (auto &MI : MBB) { - // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugValue()) { @@ -998,18 +1019,18 @@ void AsmPrinter::EmitFunctionBody() { } } - if (isVerbose()) - emitComments(MI, OutStreamer->GetCommentOS(), this); + if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) { + MachineInstr *MIP = const_cast<MachineInstr *>(&MI); + MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment); + } switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: emitCFIInstruction(MI); break; - case TargetOpcode::LOCAL_ESCAPE: emitFrameAlloc(MI); break; - case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); @@ -1049,7 +1070,7 @@ void AsmPrinter::EmitFunctionBody() { EmittedInsts += NumInstsInFunction; MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount", - MF->getFunction()->getSubprogram(), + MF->getFunction().getSubprogram(), &MF->front()); R << ore::NV("NumInstructions", NumInstsInFunction) << " instructions in function"; @@ -1077,8 +1098,8 @@ void AsmPrinter::EmitFunctionBody() { } } - const Function *F = MF->getFunction(); - for (const auto &BB : *F) { + const Function &F = MF->getFunction(); + for (const auto &BB : F) { if (!BB.hasAddressTaken()) continue; MCSymbol *Sym = GetBlockAddressSymbol(&BB); @@ -1125,6 +1146,9 @@ void AsmPrinter::EmitFunctionBody() { HI.Handler->endFunction(MF); } + // Emit section containing stack size metadata. + emitStackSizeSection(*MF); + if (isVerbose()) OutStreamer->GetCommentOS() << "-- End function\n"; @@ -1380,6 +1404,16 @@ bool AsmPrinter::doFinalization(Module &M) { PtrSize); } + // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if + // split-stack is used. + if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) { + OutStreamer->SwitchSection( + OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0)); + if (MMI->hasNosplitStack()) + OutStreamer->SwitchSection( + OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0)); + } + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -1408,7 +1442,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. - CurrentFnSym = getSymbol(MF.getFunction()); + CurrentFnSym = getSymbol(&MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; CurExceptionSym = nullptr; @@ -1420,8 +1454,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { } ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - if (isVerbose()) - LI = &getAnalysis<MachineLoopInfo>(); + LI = &getAnalysis<MachineLoopInfo>(); const TargetSubtargetInfo &STI = MF.getSubtarget(); EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() @@ -1446,7 +1479,6 @@ namespace { /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by /// the code generator. -/// void AsmPrinter::EmitConstantPool() { const MachineConstantPool *MCP = MF->getConstantPool(); const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); @@ -1526,7 +1558,6 @@ void AsmPrinter::EmitConstantPool() { /// EmitJumpTableInfo - Print assembly representations of the jump tables used /// by the current function to the current output stream. -/// void AsmPrinter::EmitJumpTableInfo() { const DataLayout &DL = MF->getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -1537,14 +1568,14 @@ void AsmPrinter::EmitJumpTableInfo() { // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); const TargetLoweringObjectFile &TLOF = getObjFileLowering(); bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, - *F); + F); if (JTInDiffSection) { // Drop it in the readonly section. - MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM); + MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); OutStreamer->SwitchSection(ReadOnlySection); } @@ -1723,7 +1754,7 @@ struct Structor { Structor() = default; }; -} // end anonymous namespace +} // end anonymous namespace /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. @@ -1818,13 +1849,11 @@ void AsmPrinter::EmitInt8(int Value) const { } /// EmitInt16 - Emit a short directive and value. -/// void AsmPrinter::EmitInt16(int Value) const { OutStreamer->EmitIntValue(Value, 2); } /// EmitInt32 - Emit a long directive and value. -/// void AsmPrinter::EmitInt32(int Value) const { OutStreamer->EmitIntValue(Value, 4); } @@ -1866,7 +1895,6 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // byte alignment. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -// void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); @@ -1921,7 +1949,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction()->getParent()); + !MF ? nullptr : MF->getFunction().getParent()); report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { @@ -2317,7 +2345,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where // gotpcrelcst := <offset from @foo base> + <cst> - // MCValue MV; if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) return; @@ -2348,7 +2375,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // If gotpcrelcst is positive it means that we can safely fold the pc rel // displacement into the GOTPCREL. We can also can have an extra offset <cst> // if the target knows how to encode it. - // int64_t GOTPCRelCst = Offset + MV.getConstant(); if (GOTPCRelCst < 0) return; @@ -2370,7 +2396,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // .long 42 // foo: // .long bar@GOTPCREL+<gotpcrelcst> - // AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; const GlobalVariable *GV = Result.first; int NumUses = (int)Result.second; @@ -2550,7 +2575,6 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, << " Depth=" << Loop->getLoopDepth() << '\n'; } - /// PrintChildLoopComment - Print comments about child loops within /// the loop for this basic block, with nesting. static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, @@ -2603,6 +2627,23 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } +void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, + MCCodePaddingContext &Context) const { + assert(MF != nullptr && "Machine function must be valid"); + assert(LI != nullptr && "Loop info must be valid"); + Context.IsPaddingActive = !MF->hasInlineAsm() && + !MF->getFunction().optForSize() && + TM.getOptLevel() != CodeGenOpt::None; + const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB); + Context.IsBasicBlockInsideInnermostLoop = + CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty(); + Context.IsBasicBlockReachableViaFallthrough = + std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != + MBB.pred_end(); + Context.IsBasicBlockReachableViaBranch = + MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB); +} + /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. @@ -2618,6 +2659,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB.getAlignment()) EmitAlignment(Align); + MCCodePaddingContext Context; + setupCodePaddingContext(MBB, Context); + OutStreamer->EmitCodePaddingBasicBlockStart(Context); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -2652,13 +2696,20 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. - OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); + OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", + false); } } else { OutStreamer->EmitLabel(MBB.getSymbol()); } } +void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) { + MCCodePaddingContext Context; + setupCodePaddingContext(MBB, Context); + OutStreamer->EmitCodePaddingBasicBlockEnd(Context); +} + void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; @@ -2765,10 +2816,13 @@ void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, Out->EmitSymbolValue(Sled, Bytes); Out->EmitSymbolValue(CurrentFnSym, Bytes); auto Kind8 = static_cast<uint8_t>(Kind); - Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1)); - Out->EmitBytes( + Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1)); + Out->EmitBinaryData( StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1)); - Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries + Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1)); + auto Padding = (4 * Bytes) - ((2 * Bytes) + 3); + assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size"); + Out->EmitZeros(Padding); } void AsmPrinter::emitXRayTable() { @@ -2776,23 +2830,26 @@ void AsmPrinter::emitXRayTable() { return; auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); + const Function &F = MF->getFunction(); MCSection *InstMap = nullptr; MCSection *FnSledIndex = nullptr; if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { - if (Fn->hasComdat()) { - InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - } else { - InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); + auto Associated = dyn_cast<MCSymbolELF>(CurrentFnSym); + assert(Associated != nullptr); + auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; + std::string GroupName; + if (F.hasComdat()) { + Flags |= ELF::SHF_GROUP; + GroupName = F.getComdat()->getName(); } + + auto UniqueID = ++XRayFnUniqueID; + InstMap = + OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0, + GroupName, UniqueID, Associated); + FnSledIndex = + OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, + GroupName, UniqueID, Associated); } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); @@ -2802,15 +2859,7 @@ void AsmPrinter::emitXRayTable() { llvm_unreachable("Unsupported target"); } - // Before we switch over, we force a reference to a label inside the - // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept - // live by the linker if the function is not garbage-collected. Since this - // function is always called just before the function's end, we assume that - // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); - MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the @@ -2829,24 +2878,23 @@ void AsmPrinter::emitXRayTable() { // pointers. This should work for both 32-bit and 64-bit platforms. OutStreamer->SwitchSection(FnSledIndex); OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); - OutStreamer->EmitLabel(IdxRef); - OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes); - OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false); OutStreamer->SwitchSection(PrevSection); Sleds.clear(); } void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, - SledKind Kind) { - auto Fn = MI.getParent()->getParent()->getFunction(); - auto Attr = Fn->getFnAttribute("function-instrument"); - bool LogArgs = Fn->hasFnAttribute("xray-log-args"); + SledKind Kind, uint8_t Version) { + const Function &F = MI.getMF()->getFunction(); + auto Attr = F.getFnAttribute("function-instrument"); + bool LogArgs = F.hasFnAttribute("xray-log-args"); bool AlwaysInstrument = Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; if (Kind == SledKind::FUNCTION_ENTER && LogArgs) Kind = SledKind::LOG_ARGS_ENTER; - Sleds.emplace_back( - XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn }); + Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind, + AlwaysInstrument, &F, Version}); } uint16_t AsmPrinter::getDwarfVersion() const { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 0edf9051d342..08eb14e242c5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,14 +12,12 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" -#include "DwarfDebug.h" -#include "DwarfExpression.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -28,9 +26,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -48,12 +44,19 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { } /// EmitULEB128 - emit the specified unsigned leb128 value. -void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, - unsigned PadTo) const { +void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo, + const char *Desc) const { if (isVerbose() && Desc) OutStreamer->AddComment(Desc); - OutStreamer->EmitULEB128IntValue(Value, PadTo); + OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo); +} + +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->EmitULEB128IntValue(Value); } static const char *DecodeDWARFEncoding(unsigned Encoding) { @@ -212,6 +215,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpEscape: OutStreamer->EmitCFIEscape(Inst.getValues()); break; + case MCCFIInstruction::OpRestore: + OutStreamer->EmitCFIRestore(Inst.getRegister()); + break; } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index eae79ad101d3..04a72ba3d738 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -17,6 +17,9 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -32,10 +35,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -144,6 +144,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); + Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo); if (Dialect == InlineAsm::AD_Intel) // We need this flag to be able to parse numbers like "0bH" Parser->setParsingInlineAsm(true); @@ -513,7 +514,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Reset SanitizeAddress based on the function's attribute. MCTargetOptions MCOptions = TM.Options.MCOptions; MCOptions.SanitizeAddress = - MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress); + MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress); EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD, MI->getInlineAsmDialect()); diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index a81d56e9618b..1d0a003dc50a 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CodeViewDebug.h" +#include "DwarfExpression.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -19,9 +20,9 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" @@ -34,9 +35,14 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -61,17 +67,15 @@ #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cctype> @@ -86,6 +90,9 @@ using namespace llvm; using namespace llvm::codeview; +static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section", + cl::ReallyHidden, cl::init(false)); + CodeViewDebug::CodeViewDebug(AsmPrinter *AP) : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { // If module doesn't have named metadata anchors or COFF debug section @@ -153,12 +160,19 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { } unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { + StringRef FullPath = getFullFilepath(F); unsigned NextId = FileIdMap.size() + 1; - auto Insertion = FileIdMap.insert(std::make_pair(F, NextId)); + auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId)); if (Insertion.second) { // We have to compute the full filepath and emit a .cv_file directive. - StringRef FullPath = getFullFilepath(F); - bool Success = OS.EmitCVFileDirective(NextId, FullPath); + std::string Checksum = fromHex(F->getChecksum()); + void *CKMem = OS.getContext().allocate(Checksum.size(), 1); + memcpy(CKMem, Checksum.data(), Checksum.size()); + ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem), + Checksum.size()); + DIFile::ChecksumKind ChecksumKind = F->getChecksumKind(); + bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, + static_cast<unsigned>(ChecksumKind)); (void)Success; assert(Success && ".cv_file directive failed"); } @@ -270,7 +284,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { // Build the fully qualified name of the scope. std::string ScopeName = getFullyQualifiedName(Scope); StringIdRecord SID(TypeIndex(), ScopeName); - auto TI = TypeTable.writeKnownType(SID); + auto TI = TypeTable.writeLeafType(SID); return recordTypeIndexForDINode(Scope, TI); } @@ -295,12 +309,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { TypeIndex ClassType = getTypeIndex(Class); MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class), DisplayName); - TI = TypeTable.writeKnownType(MFuncId); + TI = TypeTable.writeLeafType(MFuncId); } else { // Otherwise, this must be a free function. TypeIndex ParentScope = getScopeIndex(Scope); FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName); - TI = TypeTable.writeKnownType(FuncId); + TI = TypeTable.writeLeafType(FuncId); } return recordTypeIndexForDINode(SP, TI); @@ -324,8 +338,9 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP, // function type, as the complete class type is likely to reference this // member function type. TypeLoweringScope S(*this); - TypeIndex TI = - lowerTypeMemberFunction(SP->getType(), Class, SP->getThisAdjustment()); + const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0; + TypeIndex TI = lowerTypeMemberFunction( + SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod); return recordTypeIndexForDINode(SP, TI, Class); } @@ -476,10 +491,13 @@ void CodeViewDebug::endModule() { OS.AddComment("String table"); OS.EmitCVStringTableDirective(); - // Emit type information last, so that any types we translate while emitting - // function info are included. + // Emit type information and hashes last, so that any types we translate while + // emitting function info are included. emitTypeInformation(); + if (EmitDebugGlobalHashes) + emitTypeGlobalHashes(); + clear(); } @@ -496,11 +514,6 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) { } void CodeViewDebug::emitTypeInformation() { - // Do nothing if we have no debug info or if no non-trivial types were emitted - // to TypeTable during codegen. - NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); - if (!CU_Nodes) - return; if (TypeTable.empty()) return; @@ -545,7 +558,39 @@ void CodeViewDebug::emitTypeInformation() { } } -namespace { +void CodeViewDebug::emitTypeGlobalHashes() { + if (TypeTable.empty()) + return; + + // Start the .debug$H section with the version and hash algorithm, currently + // hardcoded to version 0, SHA1. + OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); + + OS.EmitValueToAlignment(4); + OS.AddComment("Magic"); + OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4); + OS.AddComment("Section Version"); + OS.EmitIntValue(0, 2); + OS.AddComment("Hash Algorithm"); + OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2); + + TypeIndex TI(TypeIndex::FirstNonSimpleIndex); + for (const auto &GHR : TypeTable.hashes()) { + if (OS.isVerboseAsm()) { + // Emit an EOL-comment describing which TypeIndex this hash corresponds + // to, as well as the stringified SHA1 hash. + SmallString<32> Comment; + raw_svector_ostream CommentOS(Comment); + CommentOS << formatv("{0:X+} [{1}]", TI.getIndex(), GHR); + OS.AddComment(Comment); + ++TI; + } + assert(GHR.Hash.size() % 20 == 0); + StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()), + GHR.Hash.size()); + OS.EmitBinaryData(S); + } +} static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { switch (DWLang) { @@ -572,6 +617,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { return SourceLanguage::Cobol; case dwarf::DW_LANG_Java: return SourceLanguage::Java; + case dwarf::DW_LANG_D: + return SourceLanguage::D; default: // There's no CodeView representation for this language, and CV doesn't // have an "unknown" option for the language field, so we'll use MASM, @@ -580,9 +627,11 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { } } +namespace { struct Version { int Part[4]; }; +} // end anonymous namespace // Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out // the version number. @@ -605,20 +654,19 @@ static Version parseVersion(StringRef Name) { static CPUType mapArchToCVCPUType(Triple::ArchType Type) { switch (Type) { - case Triple::ArchType::x86: - return CPUType::Pentium3; - case Triple::ArchType::x86_64: - return CPUType::X64; - case Triple::ArchType::thumb: - return CPUType::Thumb; - default: - report_fatal_error("target architecture doesn't map to a CodeView " - "CPUType"); + case Triple::ArchType::x86: + return CPUType::Pentium3; + case Triple::ArchType::x86_64: + return CPUType::X64; + case Triple::ArchType::thumb: + return CPUType::Thumb; + case Triple::ArchType::aarch64: + return CPUType::ARM64; + default: + report_fatal_error("target architecture doesn't map to a CodeView CPUType"); } } -} // end anonymous namespace - void CodeViewDebug::emitCompilerInformation() { MCContext &Context = MMI->getContext(); MCSymbol *CompilerBegin = Context.createTempSymbol(), @@ -678,8 +726,10 @@ void CodeViewDebug::emitInlineeLinesSubsection() { OS.AddComment("Inlinee lines subsection"); MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines); - // We don't provide any extra file info. - // FIXME: Find out if debuggers use this info. + // We emit the checksum info for files. This is used by debuggers to + // determine if a pdb matches the source before loading it. Visual Studio, + // for instance, will display a warning that the breakpoints are not valid if + // the pdb does not match the source. OS.AddComment("Inlinee lines signature"); OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4); @@ -692,13 +742,10 @@ void CodeViewDebug::emitInlineeLinesSubsection() { OS.AddComment("Inlined function " + SP->getName() + " starts at " + SP->getFilename() + Twine(':') + Twine(SP->getLine())); OS.AddBlankLine(); - // The filechecksum table uses 8 byte entries for now, and file ids start at - // 1. - unsigned FileOffset = (FileId - 1) * 8; OS.AddComment("Type index of inlined function"); OS.EmitIntValue(InlineeIdx.getIndex(), 4); OS.AddComment("Offset into filechecksum table"); - OS.EmitIntValue(FileOffset, 4); + OS.EmitCVFileChecksumOffsetDirective(FileId); OS.AddComment("Starting line number"); OS.EmitIntValue(SP->getLine(), 4); } @@ -799,6 +846,10 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, if (FuncName.empty()) FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + // Emit FPO data, but only on 32-bit x86. No other platforms use it. + if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) + OS.EmitCVFPOData(Fn); + // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); @@ -858,6 +909,30 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, emitInlinedCallSite(FI, InlinedAt, I->second); } + for (auto Annot : FI.Annotations) { + MCSymbol *Label = Annot.first; + MDTuple *Strs = cast<MDTuple>(Annot.second); + MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(), + *AnnotEnd = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2); + OS.EmitLabel(AnnotBegin); + OS.AddComment("Record kind: S_ANNOTATION"); + OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2); + OS.EmitCOFFSecRel32(Label, /*Offset=*/0); + // FIXME: Make sure we don't overflow the max record size. + OS.EmitCOFFSectionIndex(Label); + OS.EmitIntValue(Strs->getNumOperands(), 2); + for (Metadata *MD : Strs->operands()) { + // MDStrings are null terminated, so we can do EmitBytes and get the + // nice .asciz directive. + StringRef Str = cast<MDString>(MD)->getString(); + assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString"); + OS.EmitBytes(StringRef(Str.data(), Str.size() + 1)); + } + OS.EmitLabel(AnnotEnd); + } + if (SP != nullptr) emitDebugInfoForUDTs(LocalUDTs); @@ -947,13 +1022,110 @@ void CodeViewDebug::collectVariableInfoFromMFTable( } } +static bool canUseReferenceType(const DbgVariableLocation &Loc) { + return !Loc.LoadChain.empty() && Loc.LoadChain.back() == 0; +} + +static bool needsReferenceType(const DbgVariableLocation &Loc) { + return Loc.LoadChain.size() == 2 && Loc.LoadChain.back() == 0; +} + +void CodeViewDebug::calculateRanges( + LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) { + const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo(); + + // Calculate the definition ranges. + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const InsnRange &Range = *I; + const MachineInstr *DVInst = Range.first; + assert(DVInst->isDebugValue() && "Invalid History entry"); + // FIXME: Find a way to represent constant variables, since they are + // relatively common. + Optional<DbgVariableLocation> Location = + DbgVariableLocation::extractFromMachineInstruction(*DVInst); + if (!Location) + continue; + + // CodeView can only express variables in register and variables in memory + // at a constant offset from a register. However, for variables passed + // indirectly by pointer, it is common for that pointer to be spilled to a + // stack location. For the special case of one offseted load followed by a + // zero offset load (a pointer spilled to the stack), we change the type of + // the local variable from a value type to a reference type. This tricks the + // debugger into doing the load for us. + if (Var.UseReferenceType) { + // We're using a reference type. Drop the last zero offset load. + if (canUseReferenceType(*Location)) + Location->LoadChain.pop_back(); + else + continue; + } else if (needsReferenceType(*Location)) { + // This location can't be expressed without switching to a reference type. + // Start over using that. + Var.UseReferenceType = true; + Var.DefRanges.clear(); + calculateRanges(Var, Ranges); + return; + } + + // We can only handle a register or an offseted load of a register. + if (Location->Register == 0 || Location->LoadChain.size() > 1) + continue; + { + LocalVarDefRange DR; + DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); + DR.InMemory = !Location->LoadChain.empty(); + DR.DataOffset = + !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; + if (Location->FragmentInfo) { + DR.IsSubfield = true; + DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; + } else { + DR.IsSubfield = false; + DR.StructOffset = 0; + } + + if (Var.DefRanges.empty() || + Var.DefRanges.back().isDifferentLocation(DR)) { + Var.DefRanges.emplace_back(std::move(DR)); + } + } + + // Compute the label range. + const MCSymbol *Begin = getLabelBeforeInsn(Range.first); + const MCSymbol *End = getLabelAfterInsn(Range.second); + if (!End) { + // This range is valid until the next overlapping bitpiece. In the + // common case, ranges will not be bitpieces, so they will overlap. + auto J = std::next(I); + const DIExpression *DIExpr = DVInst->getDebugExpression(); + while (J != E && + !fragmentsOverlap(DIExpr, J->first->getDebugExpression())) + ++J; + if (J != E) + End = getLabelBeforeInsn(J->first); + else + End = Asm->getFunctionEnd(); + } + + // If the last range end is our begin, just extend the last range. + // Otherwise make a new range. + SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R = + Var.DefRanges.back().Ranges; + if (!R.empty() && R.back().second == Begin) + R.back().second = End; + else + R.emplace_back(Begin, End); + + // FIXME: Do more range combining. + } +} + void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { DenseSet<InlinedVariable> Processed; // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMFTable(Processed); - const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo(); - for (const auto &I : DbgValues) { InlinedVariable IV = I.first; if (Processed.count(IV)) @@ -976,87 +1148,15 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { LocalVariable Var; Var.DIVar = DIVar; - // Calculate the definition ranges. - for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { - const InsnRange &Range = *I; - const MachineInstr *DVInst = Range.first; - assert(DVInst->isDebugValue() && "Invalid History entry"); - const DIExpression *DIExpr = DVInst->getDebugExpression(); - bool IsSubfield = false; - unsigned StructOffset = 0; - - // Handle fragments. - auto Fragment = DIExpr->getFragmentInfo(); - if (Fragment) { - IsSubfield = true; - StructOffset = Fragment->OffsetInBits / 8; - } else if (DIExpr->getNumElements() > 0) { - continue; // Ignore unrecognized exprs. - } - - // Bail if operand 0 is not a valid register. This means the variable is a - // simple constant, or is described by a complex expression. - // FIXME: Find a way to represent constant variables, since they are - // relatively common. - unsigned Reg = - DVInst->getOperand(0).isReg() ? DVInst->getOperand(0).getReg() : 0; - if (Reg == 0) - continue; - - // Handle the two cases we can handle: indirect in memory and in register. - unsigned CVReg = TRI->getCodeViewRegNum(Reg); - bool InMemory = DVInst->getOperand(1).isImm(); - int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0; - { - LocalVarDefRange DR; - DR.CVRegister = CVReg; - DR.InMemory = InMemory; - DR.DataOffset = Offset; - DR.IsSubfield = IsSubfield; - DR.StructOffset = StructOffset; - - if (Var.DefRanges.empty() || - Var.DefRanges.back().isDifferentLocation(DR)) { - Var.DefRanges.emplace_back(std::move(DR)); - } - } - - // Compute the label range. - const MCSymbol *Begin = getLabelBeforeInsn(Range.first); - const MCSymbol *End = getLabelAfterInsn(Range.second); - if (!End) { - // This range is valid until the next overlapping bitpiece. In the - // common case, ranges will not be bitpieces, so they will overlap. - auto J = std::next(I); - while (J != E && - !fragmentsOverlap(DIExpr, J->first->getDebugExpression())) - ++J; - if (J != E) - End = getLabelBeforeInsn(J->first); - else - End = Asm->getFunctionEnd(); - } - - // If the last range end is our begin, just extend the last range. - // Otherwise make a new range. - SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &Ranges = - Var.DefRanges.back().Ranges; - if (!Ranges.empty() && Ranges.back().second == Begin) - Ranges.back().second = End; - else - Ranges.emplace_back(Begin, End); - - // FIXME: Do more range combining. - } - + calculateRanges(Var, Ranges); recordLocalVariable(std::move(Var), InlinedAt); } } void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { - const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV) == false); - CurFn = &FnDebugInfo[GV]; + const Function &GV = MF->getFunction(); + assert(FnDebugInfo.count(&GV) == false); + CurFn = &FnDebugInfo[&GV]; CurFn->FuncId = NextFuncId++; CurFn->Begin = Asm->getFunctionBegin(); @@ -1087,10 +1187,40 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { } } -void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) { +static bool shouldEmitUdt(const DIType *T) { + if (!T) + return false; + + // MSVC does not emit UDTs for typedefs that are scoped to classes. + if (T->getTag() == dwarf::DW_TAG_typedef) { + if (DIScope *Scope = T->getScope().resolve()) { + switch (Scope->getTag()) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_union_type: + return false; + } + } + } + + while (true) { + if (!T || T->isForwardDecl()) + return false; + + const DIDerivedType *DT = dyn_cast<DIDerivedType>(T); + if (!DT) + return true; + T = DT->getBaseType().resolve(); + } + return true; +} + +void CodeViewDebug::addToUDTs(const DIType *Ty) { // Don't record empty UDTs. if (Ty->getName().empty()) return; + if (!shouldEmitUdt(Ty)) + return; SmallVector<StringRef, 5> QualifiedNameComponents; const DISubprogram *ClosestSubprogram = getQualifiedNameComponents( @@ -1099,10 +1229,11 @@ void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) { std::string FullyQualifiedName = getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty)); - if (ClosestSubprogram == nullptr) - GlobalUDTs.emplace_back(std::move(FullyQualifiedName), TI); - else if (ClosestSubprogram == CurrentSubprogram) - LocalUDTs.emplace_back(std::move(FullyQualifiedName), TI); + if (ClosestSubprogram == nullptr) { + GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty); + } else if (ClosestSubprogram == CurrentSubprogram) { + LocalUDTs.emplace_back(std::move(FullyQualifiedName), Ty); + } // TODO: What if the ClosestSubprogram is neither null or the current // subprogram? Currently, the UDT just gets dropped on the floor. @@ -1139,7 +1270,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { // The member function type of a member function pointer has no // ThisAdjustment. return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy, - /*ThisAdjustment=*/0); + /*ThisAdjustment=*/0, + /*IsStaticMethod=*/false); } return lowerTypeFunction(cast<DISubroutineType>(Ty)); case dwarf::DW_TAG_enumeration_type: @@ -1160,7 +1292,7 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) { TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef); StringRef TypeName = Ty->getName(); - addToUDTs(Ty, UnderlyingTypeIndex); + addToUDTs(Ty); if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) && TypeName == "HRESULT") @@ -1193,11 +1325,12 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { "codeview doesn't support subranges with lower bounds"); int64_t Count = Subrange->getCount(); - // Variable Length Array (VLA) has Count equal to '-1'. - // Replace with Count '1', assume it is the minimum VLA length. - // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU. + // Forward declarations of arrays without a size and VLAs use a count of -1. + // Emit a count of zero in these cases to match what MSVC does for arrays + // without a size. MSVC doesn't support VLAs, so it's not clear what we + // should do for them even if we could distinguish them. if (Count == -1) - Count = 1; + Count = 0; // Update the element size and element type index for subsequent subranges. ElementSize *= Count; @@ -1209,7 +1342,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { StringRef Name = (i == 0) ? Ty->getName() : ""; ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name); - ElementTypeIndex = TypeTable.writeKnownType(AR); + ElementTypeIndex = TypeTable.writeLeafType(AR); } return ElementTypeIndex; @@ -1342,7 +1475,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { // do. PointerOptions PO = PointerOptions::None; PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); - return TypeTable.writeKnownType(PR); + return TypeTable.writeLeafType(PR); } static PointerToMemberRepresentation @@ -1393,7 +1526,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { MemberPointerInfo MPI( ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags())); PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI); - return TypeTable.writeKnownType(PR); + return TypeTable.writeLeafType(PR); } /// Given a DWARF calling convention, get the CodeView equivalent. If we don't @@ -1432,7 +1565,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { } TypeIndex ModifiedTI = getTypeIndex(BaseTy); ModifierRecord MR(ModifiedTI, Mods); - return TypeTable.writeKnownType(MR); + return TypeTable.writeLeafType(MR); } TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { @@ -1449,18 +1582,19 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { } ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); - TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec); + TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None, ArgTypeIndices.size(), ArgListIndex); - return TypeTable.writeKnownType(Procedure); + return TypeTable.writeLeafType(Procedure); } TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, const DIType *ClassTy, - int ThisAdjustment) { + int ThisAdjustment, + bool IsStaticMethod) { // Lower the containing class type. TypeIndex ClassType = getTypeIndex(ClassTy); @@ -1475,26 +1609,22 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, ReturnTypeIndex = ReturnAndArgTypesRef.front(); ArgTypeIndices = ReturnAndArgTypesRef.drop_front(); } - TypeIndex ThisTypeIndex = TypeIndex::Void(); - if (!ArgTypeIndices.empty()) { + TypeIndex ThisTypeIndex; + if (!IsStaticMethod && !ArgTypeIndices.empty()) { ThisTypeIndex = ArgTypeIndices.front(); ArgTypeIndices = ArgTypeIndices.drop_front(); } ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); - TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec); + TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); - // TODO: Need to use the correct values for: - // FunctionOptions - // ThisPointerAdjustment. + // TODO: Need to use the correct values for FunctionOptions. MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None, ArgTypeIndices.size(), ArgListIndex, ThisAdjustment); - TypeIndex TI = TypeTable.writeKnownType(MFR); - - return TI; + return TypeTable.writeLeafType(MFR); } TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { @@ -1503,7 +1633,7 @@ TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near); VFTableShapeRecord VFTSR(Slots); - return TypeTable.writeKnownType(VFTSR); + return TypeTable.writeLeafType(VFTSR); } static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) { @@ -1530,6 +1660,9 @@ static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) { static MethodKind translateMethodKindFlags(const DISubprogram *SP, bool Introduced) { + if (SP->getFlags() & DINode::FlagStaticMember) + return MethodKind::Static; + switch (SP->getVirtuality()) { case dwarf::DW_VIRTUALITY_none: break; @@ -1542,8 +1675,6 @@ static MethodKind translateMethodKindFlags(const DISubprogram *SP, llvm_unreachable("unhandled virtuality case"); } - // FIXME: Get Clang to mark DISubprogram as static and do something with it. - return MethodKind::Vanilla; } @@ -1593,9 +1724,8 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { if (Ty->isForwardDecl()) { CO |= ClassOptions::ForwardReference; } else { - FieldListRecordBuilder FLRB(TypeTable); - - FLRB.begin(); + ContinuationRecordBuilder ContinuationBuilder; + ContinuationBuilder.begin(ContinuationRecordKind::FieldList); for (const DINode *Element : Ty->getElements()) { // We assume that the frontend provides all members in source declaration // order, which is what MSVC does. @@ -1603,18 +1733,18 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { EnumeratorRecord ER(MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()), Enumerator->getName()); - FLRB.writeMemberType(ER); + ContinuationBuilder.writeMemberType(ER); EnumeratorCount++; } } - FTI = FLRB.end(true); + FTI = TypeTable.insertRecord(ContinuationBuilder); } std::string FullName = getFullyQualifiedName(Ty); EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(), getTypeIndex(Ty->getBaseType())); - return TypeTable.writeKnownType(ER); + return TypeTable.writeLeafType(ER); } //===----------------------------------------------------------------------===// @@ -1643,7 +1773,7 @@ struct llvm::ClassInfo { TypeIndex VShapeTI; - std::vector<const DICompositeType *> NestedClasses; + std::vector<const DIType *> NestedTypes; }; void CodeViewDebug::clear() { @@ -1694,12 +1824,14 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) { } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type && DDTy->getName() == "__vtbl_ptr_type") { Info.VShapeTI = getTypeIndex(DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_typedef) { + Info.NestedTypes.push_back(DDTy); } else if (DDTy->getTag() == dwarf::DW_TAG_friend) { // Ignore friend members. It appears that MSVC emitted info about // friends in the past, but modern versions do not. } } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { - Info.NestedClasses.push_back(Composite); + Info.NestedTypes.push_back(Composite); } // Skip other unrecognized kinds of elements. } @@ -1715,7 +1847,7 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { std::string FullName = getFullyQualifiedName(Ty); ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()); - TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR); + TypeIndex FwdDeclTI = TypeTable.writeLeafType(CR); if (!Ty->isForwardDecl()) DeferredCompleteTypes.push_back(Ty); return FwdDeclTI; @@ -1741,16 +1873,17 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()); - TypeIndex ClassTI = TypeTable.writeKnownType(CR); + TypeIndex ClassTI = TypeTable.writeLeafType(CR); if (const auto *File = Ty->getFile()) { StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); - TypeIndex SIDI = TypeTable.writeKnownType(SIDR); + TypeIndex SIDI = TypeTable.writeLeafType(SIDR); + UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); - TypeTable.writeKnownType(USLR); + TypeTable.writeLeafType(USLR); } - addToUDTs(Ty, ClassTI); + addToUDTs(Ty); return ClassTI; } @@ -1760,7 +1893,7 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { ClassOptions::ForwardReference | getCommonClassOptions(Ty); std::string FullName = getFullyQualifiedName(Ty); UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier()); - TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR); + TypeIndex FwdDeclTI = TypeTable.writeLeafType(UR); if (!Ty->isForwardDecl()) DeferredCompleteTypes.push_back(Ty); return FwdDeclTI; @@ -1782,14 +1915,15 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) { UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName, Ty->getIdentifier()); - TypeIndex UnionTI = TypeTable.writeKnownType(UR); + TypeIndex UnionTI = TypeTable.writeLeafType(UR); StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); - TypeIndex SIRI = TypeTable.writeKnownType(SIR); + TypeIndex SIRI = TypeTable.writeLeafType(SIR); + UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine()); - TypeTable.writeKnownType(USLR); + TypeTable.writeLeafType(USLR); - addToUDTs(Ty, UnionTI); + addToUDTs(Ty); return UnionTI; } @@ -1802,8 +1936,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { // list record. unsigned MemberCount = 0; ClassInfo Info = collectClassInfo(Ty); - FieldListRecordBuilder FLBR(TypeTable); - FLBR.begin(); + ContinuationRecordBuilder ContinuationBuilder; + ContinuationBuilder.begin(ContinuationRecordKind::FieldList); // Create base classes. for (const DIDerivedType *I : Info.Inheritance) { @@ -1821,14 +1955,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset, VBTableIndex); - FLBR.writeMemberType(VBCR); + ContinuationBuilder.writeMemberType(VBCR); } else { assert(I->getOffsetInBits() % 8 == 0 && "bases must be on byte boundaries"); BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()), getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8); - FLBR.writeMemberType(BCR); + ContinuationBuilder.writeMemberType(BCR); } } @@ -1842,7 +1976,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { if (Member->isStaticMember()) { StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName); - FLBR.writeMemberType(SDMR); + ContinuationBuilder.writeMemberType(SDMR); MemberCount++; continue; } @@ -1851,7 +1985,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { if ((Member->getFlags() & DINode::FlagArtificial) && Member->getName().startswith("_vptr$")) { VFPtrRecord VFPR(getTypeIndex(Member->getBaseType())); - FLBR.writeMemberType(VFPR); + ContinuationBuilder.writeMemberType(VFPR); MemberCount++; continue; } @@ -1868,12 +2002,12 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { StartBitOffset -= MemberOffsetInBits; BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(), StartBitOffset); - MemberBaseType = TypeTable.writeKnownType(BFR); + MemberBaseType = TypeTable.writeLeafType(BFR); } uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8; DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes, MemberName); - FLBR.writeMemberType(DMR); + ContinuationBuilder.writeMemberType(DMR); MemberCount++; } @@ -1898,40 +2032,42 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { } assert(!Methods.empty() && "Empty methods map entry"); if (Methods.size() == 1) - FLBR.writeMemberType(Methods[0]); + ContinuationBuilder.writeMemberType(Methods[0]); else { + // FIXME: Make this use its own ContinuationBuilder so that + // MethodOverloadList can be split correctly. MethodOverloadListRecord MOLR(Methods); - TypeIndex MethodList = TypeTable.writeKnownType(MOLR); + TypeIndex MethodList = TypeTable.writeLeafType(MOLR); + OverloadedMethodRecord OMR(Methods.size(), MethodList, Name); - FLBR.writeMemberType(OMR); + ContinuationBuilder.writeMemberType(OMR); } } // Create nested classes. - for (const DICompositeType *Nested : Info.NestedClasses) { + for (const DIType *Nested : Info.NestedTypes) { NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName()); - FLBR.writeMemberType(R); + ContinuationBuilder.writeMemberType(R); MemberCount++; } - TypeIndex FieldTI = FLBR.end(true); + TypeIndex FieldTI = TypeTable.insertRecord(ContinuationBuilder); return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, - !Info.NestedClasses.empty()); + !Info.NestedTypes.empty()); } TypeIndex CodeViewDebug::getVBPTypeIndex() { if (!VBPType.getIndex()) { // Make a 'const int *' type. ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const); - TypeIndex ModifiedTI = TypeTable.writeKnownType(MR); + TypeIndex ModifiedTI = TypeTable.writeLeafType(MR); PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 : PointerKind::Near32; PointerMode PM = PointerMode::Pointer; PointerOptions PO = PointerOptions::None; PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes()); - - VBPType = TypeTable.writeKnownType(PR); + VBPType = TypeTable.writeLeafType(PR); } return VBPType; @@ -1957,6 +2093,16 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) { return recordTypeIndexForDINode(Ty, TI, ClassTy); } +TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) { + DIType *Ty = TypeRef.resolve(); + PointerRecord PR(getTypeIndex(Ty), + getPointerSizeInBytes() == 8 ? PointerKind::Near64 + : PointerKind::Near32, + PointerMode::LValueReference, PointerOptions::None, + Ty->getSizeInBits() / 8); + return TypeTable.writeLeafType(PR); +} + TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { const DIType *Ty = TypeRef.resolve(); @@ -2064,7 +2210,9 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { Flags |= LocalSymFlags::IsOptimizedOut; OS.AddComment("TypeIndex"); - TypeIndex TI = getCompleteTypeIndex(Var.DIVar->getType()); + TypeIndex TI = Var.UseReferenceType + ? getTypeIndexForReferenceTo(Var.DIVar->getType()) + : getCompleteTypeIndex(Var.DIVar->getType()); OS.EmitIntValue(TI.getIndex(), 4); OS.AddComment("Flags"); OS.EmitIntValue(static_cast<uint16_t>(Flags), 2); @@ -2125,19 +2273,21 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { } void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { - const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV)); - assert(CurFn == &FnDebugInfo[GV]); + const Function &GV = MF->getFunction(); + assert(FnDebugInfo.count(&GV)); + assert(CurFn == &FnDebugInfo[&GV]); - collectVariableInfo(GV->getSubprogram()); + collectVariableInfo(GV.getSubprogram()); // Don't emit anything if we don't have any line tables. if (!CurFn->HaveLineInfo) { - FnDebugInfo.erase(GV); + FnDebugInfo.erase(&GV); CurFn = nullptr; return; } + CurFn->Annotations = MF->getCodeViewAnnotations(); + CurFn->End = Asm->getFunctionEnd(); CurFn = nullptr; @@ -2156,6 +2306,8 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugLoc DL = MI->getDebugLoc(); if (!DL && MI->getParent() != PrevInstBB) { for (const auto &NextMI : *MI->getParent()) { + if (NextMI.isDebugValue()) + continue; DL = NextMI.getDebugLoc(); if (DL) break; @@ -2187,8 +2339,11 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) { } void CodeViewDebug::emitDebugInfoForUDTs( - ArrayRef<std::pair<std::string, TypeIndex>> UDTs) { - for (const std::pair<std::string, codeview::TypeIndex> &UDT : UDTs) { + ArrayRef<std::pair<std::string, const DIType *>> UDTs) { + for (const auto &UDT : UDTs) { + const DIType *T = UDT.second; + assert(shouldEmitUdt(T)); + MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(), *UDTRecordEnd = MMI->getContext().createTempSymbol(); OS.AddComment("Record length"); @@ -2199,7 +2354,7 @@ void CodeViewDebug::emitDebugInfoForUDTs( OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2); OS.AddComment("Type"); - OS.EmitIntValue(UDT.second.getIndex(), 4); + OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4); emitNullTerminatedSymbolName(OS, UDT.first); OS.EmitLabel(UDTRecordEnd); diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index fd8f60425c24..69e93640d7ef 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -23,8 +23,8 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -52,7 +52,7 @@ class MachineFunction; class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; BumpPtrAllocator Allocator; - codeview::TypeTableBuilder TypeTable; + codeview::GlobalTypeTableBuilder TypeTable; /// Represents the most general definition range. struct LocalVarDefRange { @@ -94,6 +94,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { struct LocalVariable { const DILocalVariable *DIVar = nullptr; SmallVector<LocalVarDefRange, 1> DefRanges; + bool UseReferenceType = false; }; struct InlineSite { @@ -118,6 +119,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<LocalVariable, 1> Locals; + std::vector<std::pair<MCSymbol *, MDNode *>> Annotations; + const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; unsigned FuncId = 0; @@ -147,6 +150,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP); + void calculateRanges(LocalVariable &Var, + const DbgValueHistoryMap::InstrRanges &Ranges); + static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI, const InlineSite &Site); @@ -155,8 +161,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// emit at the end of the TU. MapVector<const Function *, FunctionInfo> FnDebugInfo; - /// Map from DIFile to .cv_file id. - DenseMap<const DIFile *, unsigned> FileIdMap; + /// Map from full file path to .cv_file id. Full paths are built from DIFiles + /// and are stored in FileToFilepathMap; + DenseMap<StringRef, unsigned> FileIdMap; /// All inlined subprograms in the order they should be emitted. SmallSetVector<const DISubprogram *, 4> InlinedSubprograms; @@ -187,8 +194,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { // The UDTs we have seen while processing types; each entry is a pair of type // index and type name. - std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs, - GlobalUDTs; + std::vector<std::pair<std::string, const DIType *>> LocalUDTs; + std::vector<std::pair<std::string, const DIType *>> GlobalUDTs; using FileToFilepathMapTy = std::map<const DIFile *, std::string>; FileToFilepathMapTy FileToFilepathMap; @@ -212,6 +219,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitTypeInformation(); + void emitTypeGlobalHashes(); + void emitCompilerInformation(); void emitInlineeLinesSubsection(); @@ -222,8 +231,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitDebugInfoForRetainedTypes(); - void emitDebugInfoForUDTs( - ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs); + void + emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs); void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, const GlobalVariable *GV, MCSymbol *GVSym); @@ -259,6 +268,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef = DITypeRef()); + codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef); + codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP, const DICompositeType *Class); @@ -266,7 +277,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex getVBPTypeIndex(); - void addToUDTs(const DIType *Ty, codeview::TypeIndex TI); + void addToUDTs(const DIType *Ty); codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy); codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty); @@ -279,7 +290,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty, const DIType *ClassTy, - int ThisAdjustment); + int ThisAdjustment, + bool IsStaticMethod); codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty); codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty); codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty); diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 886e6e264b3e..b3148db30cd6 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -777,6 +777,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + case dwarf::DW_FORM_data16: break; } for (const auto &V : values()) @@ -791,6 +792,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); + case dwarf::DW_FORM_data16: return 16; default: llvm_unreachable("Improper form for block"); } } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index c2ad9db81cfd..856758c8e4f6 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===// +//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===// // // The LLVM Compiler Infrastructure // @@ -9,17 +9,24 @@ #include "DbgValueHistoryCalculator.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <algorithm> +#include <cassert> #include <map> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "dwarfdebug" @@ -72,10 +79,12 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const { } namespace { + // Maps physreg numbers to the variables they describe. -typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; -typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap; -} +using InlinedVariable = DbgValueHistoryMap::InlinedVariable; +using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>; + +} // end anonymous namespace // \brief Claim that @Var is not described by @RegNo anymore. static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, @@ -83,7 +92,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, const auto &I = RegVars.find(RegNo); assert(RegNo != 0U && I != RegVars.end()); auto &VarSet = I->second; - const auto &VarPos = find(VarSet, Var); + const auto &VarPos = llvm::find(VarSet, Var); assert(VarPos != VarSet.end()); VarSet.erase(VarPos); // Don't keep empty sets in a map to keep it as small as possible. diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index 16d2d7fd7e99..a7b0562e8102 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===// +//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,9 +13,11 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugInfoMetadata.h" +#include <utility> namespace llvm { +class DILocalVariable; class MachineFunction; class MachineInstr; class TargetRegisterInfo; @@ -29,11 +31,11 @@ class DbgValueHistoryMap { // instruction of the next instruction range, or until the end of the // function. public: - typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange; - typedef SmallVector<InstrRange, 4> InstrRanges; - typedef std::pair<const DILocalVariable *, const DILocation *> - InlinedVariable; - typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap; + using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>; + using InstrRanges = SmallVector<InstrRange, 4>; + using InlinedVariable = + std::pair<const DILocalVariable *, const DILocation *>; + using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>; private: InstrRangesMap VarInstrRanges; @@ -41,6 +43,7 @@ private: public: void startInstrRange(InlinedVariable Var, const MachineInstr &MI); void endInstrRange(InlinedVariable Var, const MachineInstr &MI); + // Returns register currently describing @Var. If @Var is currently // unaccessible or is not described by a register, returns 0. unsigned getRegisterForVar(InlinedVariable Var) const; @@ -54,6 +57,7 @@ public: void calculateDbgValueHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, DbgValueHistoryMap &Result); -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 0971c5942203..d94b0e5c2118 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -13,16 +13,76 @@ //===----------------------------------------------------------------------===// #include "DebugHandlerBase.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +Optional<DbgVariableLocation> +DbgVariableLocation::extractFromMachineInstruction( + const MachineInstr &Instruction) { + DbgVariableLocation Location; + if (!Instruction.isDebugValue()) + return None; + if (!Instruction.getOperand(0).isReg()) + return None; + Location.Register = Instruction.getOperand(0).getReg(); + Location.FragmentInfo.reset(); + // We only handle expressions generated by DIExpression::appendOffset, + // which doesn't require a full stack machine. + int64_t Offset = 0; + const DIExpression *DIExpr = Instruction.getDebugExpression(); + auto Op = DIExpr->expr_op_begin(); + while (Op != DIExpr->expr_op_end()) { + switch (Op->getOp()) { + case dwarf::DW_OP_constu: { + int Value = Op->getArg(0); + ++Op; + if (Op != DIExpr->expr_op_end()) { + switch (Op->getOp()) { + case dwarf::DW_OP_minus: + Offset -= Value; + break; + case dwarf::DW_OP_plus: + Offset += Value; + break; + default: + continue; + } + } + } break; + case dwarf::DW_OP_plus_uconst: + Offset += Op->getArg(0); + break; + case dwarf::DW_OP_LLVM_fragment: + Location.FragmentInfo = {Op->getArg(1), Op->getArg(0)}; + break; + case dwarf::DW_OP_deref: + Location.LoadChain.push_back(Offset); + Offset = 0; + break; + default: + return None; + } + ++Op; + } + + // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE + // instruction. + // FIXME: Replace these with DIExpression. + if (Instruction.isIndirectDebugValue()) + Location.LoadChain.push_back(Offset); + + return Location; +} + DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} // Each LexicalScope has first instruction and last instruction to mark @@ -119,7 +179,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return false; - auto *SP = MF->getFunction()->getSubprogram(); + auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; assert(SP->getUnit()); @@ -163,7 +223,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // label, so arguments are visible when breaking at function entry. const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); if (DIVar->isParameter() && - getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { + getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); if (Ranges.front().first->getDebugExpression()->isFragment()) { // Mark all non-overlapping initial fragments. diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h index 659a921e1fc5..245d70038de9 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h @@ -17,14 +17,38 @@ #include "AsmPrinterHandler.h" #include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DebugInfoMetadata.h" namespace llvm { class AsmPrinter; +class MachineInstr; class MachineModuleInfo; +/// Represents the location at which a variable is stored. +struct DbgVariableLocation { + /// Base register. + unsigned Register; + + /// Chain of offsetted loads necessary to load the value if it lives in + /// memory. Every load except for the last is pointer-sized. + SmallVector<int64_t, 1> LoadChain; + + /// Present if the location is part of a larger variable. + llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo; + + /// Extract a VariableLocation from a MachineInstr. + /// This will only work if Instruction is a debug value instruction + /// and the associated DIExpression is in one of the supported forms. + /// If these requirements are not met, the returned Optional will not + /// have a value. + static Optional<DbgVariableLocation> + extractFromMachineInstruction(const MachineInstr &Instruction); +}; + /// Base class for debug information backends. Common functionality related to /// tracking which variables and scopes are alive at a given PC live here. class DebugHandlerBase : public AsmPrinterHandler { diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index a68e8cc6b4b3..3d6d8a76529c 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -81,7 +81,7 @@ public: if (isLocation()) { llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " "; if (Loc.isIndirect()) - llvm::dbgs() << '+' << Loc.getOffset(); + llvm::dbgs() << "+0"; llvm::dbgs() << "} "; } else if (isConstantInt()) diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 9c324ea26ac8..c21b3d3451ad 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -1,4 +1,4 @@ -//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +//===- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables --------===// // // The LLVM Compiler Infrastructure // @@ -12,16 +12,22 @@ //===----------------------------------------------------------------------===// #include "DwarfAccelTable.h" -#include "DwarfCompileUnit.h" -#include "DwarfDebug.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <vector> using namespace llvm; @@ -142,13 +148,13 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { unsigned index = 0; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { Asm->OutStreamer->AddComment("Bucket " + Twine(i)); - if (Buckets[i].size() != 0) + if (!Buckets[i].empty()) Asm->EmitInt32(index); else - Asm->EmitInt32(UINT32_MAX); + Asm->EmitInt32(std::numeric_limits<uint32_t>::max()); // Buckets point in the list of hashes, not to the data. Do not // increment the index multiple times in case of hash collisions. - uint64_t PrevHash = UINT64_MAX; + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); for (auto *HD : Buckets[i]) { uint32_t HashValue = HD->HashValue; if (PrevHash != HashValue) @@ -161,7 +167,7 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { // Walk through the buckets and emit the individual hashes for each // bucket. void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { - uint64_t PrevHash = UINT64_MAX; + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); @@ -181,7 +187,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { // beginning of the section. The non-section symbol will be output later // when we emit the actual data. void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { - uint64_t PrevHash = UINT64_MAX; + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); @@ -205,13 +211,14 @@ void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { // Terminate each HashData bucket with 0. void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { - uint64_t PrevHash = UINT64_MAX; + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { // Terminate the previous entry if there is no hash collision // with the current one. - if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue) + if (PrevHash != std::numeric_limits<uint64_t>::max() && + PrevHash != (*HI)->HashValue) Asm->EmitInt32(0); // Remember to emit the label for our offset. Asm->OutStreamer->EmitLabel((*HI)->Sym); @@ -257,31 +264,30 @@ void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, } #ifndef NDEBUG -void DwarfAccelTable::print(raw_ostream &O) { - - Header.print(O); - HeaderData.print(O); +void DwarfAccelTable::print(raw_ostream &OS) { + Header.print(OS); + HeaderData.print(OS); - O << "Entries: \n"; + OS << "Entries: \n"; for (StringMap<DataArray>::const_iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { - O << "Name: " << EI->getKeyData() << "\n"; + OS << "Name: " << EI->getKeyData() << "\n"; for (HashDataContents *HD : EI->second.Values) - HD->print(O); + HD->print(OS); } - O << "Buckets and Hashes: \n"; + OS << "Buckets and Hashes: \n"; for (size_t i = 0, e = Buckets.size(); i < e; ++i) for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) - (*HI)->print(O); + (*HI)->print(OS); - O << "Data: \n"; + OS << "Data: \n"; for (std::vector<HashData *>::const_iterator DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) - (*DI)->print(O); + (*DI)->print(OS); } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index b1ef8cfe989d..f56199dc8e72 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +//==- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -15,16 +15,19 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" +#include <cstddef> +#include <cstdint> #include <vector> // The dwarf accelerator tables are an indirect hash table optimized @@ -65,44 +68,35 @@ class AsmPrinter; class DwarfDebug; class DwarfAccelTable { - - static uint32_t HashDJB(StringRef Str) { - uint32_t h = 5381; - for (unsigned i = 0, e = Str.size(); i != e; ++i) - h = ((h << 5) + h) + Str[i]; - return h; - } - // Helper function to compute the number of buckets needed based on // the number of unique hashes. - void ComputeBucketCount(void); + void ComputeBucketCount(); struct TableHeader { - uint32_t magic; // 'HASH' magic value to allow endian detection - uint16_t version; // Version number. - uint16_t hash_function; // The hash function enumeration that was used. - uint32_t bucket_count; // The number of buckets in this hash table. - uint32_t hashes_count; // The total number of unique hash values - // and hash data offsets in this table. - uint32_t header_data_len; // The bytes to skip to get to the hash - // indexes (buckets) for correct alignment. + uint32_t magic = MagicHash; // 'HASH' magic value to allow endian detection + uint16_t version = 1; // Version number. + uint16_t hash_function = dwarf::DW_hash_function_djb; + // The hash function enumeration that was used. + uint32_t bucket_count = 0; // The number of buckets in this hash table. + uint32_t hashes_count = 0; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. // Also written to disk is the implementation specific header data. static const uint32_t MagicHash = 0x48415348; - TableHeader(uint32_t data_len) - : magic(MagicHash), version(1), - hash_function(dwarf::DW_hash_function_djb), bucket_count(0), - hashes_count(0), header_data_len(data_len) {} + TableHeader(uint32_t data_len) : header_data_len(data_len) {} #ifndef NDEBUG - void print(raw_ostream &O) { - O << "Magic: " << format("0x%x", magic) << "\n" - << "Version: " << version << "\n" - << "Hash Function: " << hash_function << "\n" - << "Bucket Count: " << bucket_count << "\n" - << "Header Data Length: " << header_data_len << "\n"; + void print(raw_ostream &OS) { + OS << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; } + void dump() { print(dbgs()); } #endif }; @@ -127,11 +121,13 @@ public: uint16_t form; // DWARF DW_FORM_ defines constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {} + #ifndef NDEBUG - void print(raw_ostream &O) { - O << "Type: " << dwarf::AtomTypeString(type) << "\n" - << "Form: " << dwarf::FormEncodingString(form) << "\n"; + void print(raw_ostream &OS) { + OS << "Type: " << dwarf::AtomTypeString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; } + void dump() { print(dbgs()); } #endif }; @@ -145,11 +141,12 @@ private: : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} #ifndef NDEBUG - void print(raw_ostream &O) { - O << "die_offset_base: " << die_offset_base << "\n"; + void print(raw_ostream &OS) { + OS << "die_offset_base: " << die_offset_base << "\n"; for (size_t i = 0; i < Atoms.size(); i++) - Atoms[i].print(O); + Atoms[i].print(OS); } + void dump() { print(dbgs()); } #endif }; @@ -168,11 +165,12 @@ public: char Flags; // Specific flags to output HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} + #ifndef NDEBUG - void print(raw_ostream &O) const { - O << " Offset: " << Die->getOffset() << "\n"; - O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; - O << " Flags: " << Flags << "\n"; + void print(raw_ostream &OS) const { + OS << " Offset: " << Die->getOffset() << "\n" + << " Tag: " << dwarf::TagString(Die->getTag()) << "\n" + << " Flags: " << Flags << "\n"; } #endif }; @@ -183,39 +181,41 @@ private: DwarfStringPoolEntryRef Name; std::vector<HashDataContents *> Values; }; + friend struct HashData; + struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; DwarfAccelTable::DataArray &Data; // offsets + HashData(StringRef S, DwarfAccelTable::DataArray &Data) : Str(S), Data(Data) { - HashValue = DwarfAccelTable::HashDJB(S); + HashValue = dwarf::djbHash(S); } + #ifndef NDEBUG - void print(raw_ostream &O) { - O << "Name: " << Str << "\n"; - O << " Hash Value: " << format("0x%x", HashValue) << "\n"; - O << " Symbol: "; + void print(raw_ostream &OS) { + OS << "Name: " << Str << "\n"; + OS << " Hash Value: " << format("0x%x", HashValue) << "\n"; + OS << " Symbol: "; if (Sym) - O << *Sym; + OS << *Sym; else - O << "<none>"; - O << "\n"; + OS << "<none>"; + OS << "\n"; for (HashDataContents *C : Data.Values) { - O << " Offset: " << C->Die->getOffset() << "\n"; - O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; - O << " Flags: " << C->Flags << "\n"; + OS << " Offset: " << C->Die->getOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; + OS << " Flags: " << C->Flags << "\n"; } } + void dump() { print(dbgs()); } #endif }; - DwarfAccelTable(const DwarfAccelTable &) = delete; - void operator=(const DwarfAccelTable &) = delete; - // Internal Functions void EmitHeader(AsmPrinter *); void EmitBuckets(AsmPrinter *); @@ -231,25 +231,31 @@ private: TableHeaderData HeaderData; std::vector<HashData *> Data; - typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries; + using StringEntries = StringMap<DataArray, BumpPtrAllocator &>; + StringEntries Entries; // Buckets/Hashes/Offsets - typedef std::vector<HashData *> HashList; - typedef std::vector<HashList> BucketList; + using HashList = std::vector<HashData *>; + using BucketList = std::vector<HashList>; BucketList Buckets; HashList Hashes; // Public Implementation public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); + DwarfAccelTable(const DwarfAccelTable &) = delete; + DwarfAccelTable &operator=(const DwarfAccelTable &) = delete; + void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); #ifndef NDEBUG - void print(raw_ostream &O); + void print(raw_ostream &OS); void dump() { print(dbgs()); } #endif }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index dd7f7931b06b..cbb4c48b4d88 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -12,13 +12,12 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -31,11 +30,7 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) @@ -92,7 +87,7 @@ static MCSymbol *getExceptionSym(AsmPrinter *Asm) { void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); // If any landing pads survive, we need an EH table. bool hasLandingPads = !MF->getLandingPads().empty(); @@ -105,17 +100,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); const Function *Per = nullptr; - if (F->hasPersonalityFn()) - Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); // Emit a personality function even when there are no landing pads forceEmitPersonality = // ...if a personality function is explicitly specified - F->hasPersonalityFn() && + F.hasPersonalityFn() && // ... and it's not known to be a noop in the absence of invokes !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && // ... and we're not explicitly asked not to emit it - F->needsUnwindTableEntry(); + F.needsUnwindTableEntry(); shouldEmitPersonality = (forceEmitPersonality || @@ -148,8 +143,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, if (!shouldEmitPersonality) return; - auto *F = MBB->getParent()->getFunction(); - auto *P = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + auto &F = MBB->getParent()->getFunction(); + auto *P = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); assert(P && "Expected personality function"); // If we are forced to emit this personality, make sure to record diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 333d14a11af5..c8cd8eb8ffd3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units -----------===// +//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===// // // The LLVM Compiler Infrastructure // @@ -12,38 +12,58 @@ //===----------------------------------------------------------------------===// #include "DwarfCompileUnit.h" +#include "AddressPool.h" +#include "DwarfDebug.h" #include "DwarfExpression.h" +#include "DwarfUnit.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/Constants.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instruction.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <utility> -namespace llvm { +using namespace llvm; DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID), - Skeleton(nullptr), BaseAddress(nullptr) { + : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) { insertDIE(Node, &getUnitDie()); MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin"); } /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. -/// void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { - // Don't use the address pool in non-fission or in the skeleton unit itself. // FIXME: Once GDB supports this, it's probably worthwhile using the address // pool from the skeleton - maybe even in non-fission (possibly fewer @@ -336,23 +356,15 @@ void DwarfCompileUnit::constructScopeDIE( if (DD->isLexicalScopeDIENull(Scope)) return; - unsigned ChildScopeCount; + bool HasNonScopeChildren = false; // We create children here when we know the scope DIE is not going to be // null and the children will be added to the scope DIE. - createScopeChildrenDIE(Scope, Children, &ChildScopeCount); - - // Skip imported directives in gmlt-like data. - if (!includeMinimalInlineScopes()) { - // There is no need to emit empty lexical block DIE. - for (const auto *IE : ImportedEntities[DS]) - Children.push_back( - constructImportedEntityDIE(cast<DIImportedEntity>(IE))); - } + createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren); // If there are only other scopes as children, put them directly in the // parent instead, as this scope would serve no purpose. - if (Children.size() == ChildScopeCount) { + if (!HasNonScopeChildren) { FinalChildren.insert(FinalChildren.end(), std::make_move_iterator(Children.begin()), std::make_move_iterator(Children.end())); @@ -488,14 +500,12 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, if (const MachineInstr *DVInsn = DV.getMInsn()) { assert(DVInsn->getNumOperands() == 4); if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); + auto RegOp = DVInsn->getOperand(0); + auto Op1 = DVInsn->getOperand(1); // If the second operand is an immediate, this is an indirect value. - if (DVInsn->getOperand(1).isImm()) { - MachineLocation Location(RegOp.getReg(), - DVInsn->getOperand(1).getImm()); - addVariableAddress(DV, *VariableDie, Location); - } else if (RegOp.getReg()) - addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); + assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); + MachineLocation Location(RegOp.getReg(), Op1.isImm()); + addVariableAddress(DV, *VariableDie, Location); } else if (DVInsn->getOperand(0).isImm()) { // This variable is described by a single constant. // Check whether it has a DIExpression. @@ -557,20 +567,27 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, SmallVectorImpl<DIE *> &Children, - unsigned *ChildScopeCount) { + bool *HasNonScopeChildren) { + assert(Children.empty()); DIE *ObjectPointer = nullptr; for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); - unsigned ChildCountWithoutScopes = Children.size(); + // Skip imported directives in gmlt-like data. + if (!includeMinimalInlineScopes()) { + // There is no need to emit empty lexical block DIE. + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + Children.push_back( + constructImportedEntityDIE(cast<DIImportedEntity>(IE))); + } + + if (HasNonScopeChildren) + *HasNonScopeChildren = !Children.empty(); for (LexicalScope *LS : Scope->getChildren()) constructScopeDIE(LS, Children); - if (ChildScopeCount) - *ChildScopeCount = Children.size() - ChildCountWithoutScopes; - return ObjectPointer; } @@ -726,7 +743,7 @@ DbgVariable *DwarfCompileUnit::getExistingAbstractVariable( void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope) { assert(Scope && Scope->isAbstractScope()); - auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr); + auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr); DU->addScopeVariable(Scope, AbsDbgVariable.get()); getAbstractVariables()[Var] = std::move(AbsDbgVariable); } @@ -744,10 +761,19 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { DwarfUnit::emitCommonHeader(UseOffsets, UT); } +bool DwarfCompileUnit::hasDwarfPubSections() const { + // Opting in to GNU Pubnames/types overrides the default to ensure these are + // generated for things like Gold's gdb_index generation. + if (CUNode->getGnuPubnames()) + return true; + + return DD->tuneForGDB() && !includeMinimalInlineScopes(); +} + /// addGlobalName - Add a new global name to the compile unit. void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) { - if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) + if (!hasDwarfPubSections()) return; std::string FullName = getParentContextString(Context) + Name.str(); GlobalNames[FullName] = &Die; @@ -755,7 +781,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context) { - if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) + if (!hasDwarfPubSections()) return; std::string FullName = getParentContextString(Context) + Name.str(); // Insert, allowing the entry to remain as-is if it's already present @@ -768,7 +794,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, /// Add a new global type to the unit. void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { - if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) + if (!hasDwarfPubSections()) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; @@ -776,7 +802,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context) { - if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) + if (!hasDwarfPubSections()) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); // Insert, allowing the entry to remain as-is if it's already present @@ -790,6 +816,12 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, /// DbgVariable based on provided MachineLocation. void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { + // addBlockByrefAddress is obsolete and will be removed soon. + // The clang frontend always generates block byref variables with a + // complex expression that encodes exactly what addBlockByrefAddress + // would do. + assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) && + "block byref variable without a complex expression"); if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else if (DV.isBlockByrefVariable()) @@ -806,12 +838,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, if (Location.isIndirect()) DwarfExpr.setMemoryLocationKind(); - SmallVector<uint64_t, 8> Ops; - if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Location.getOffset()); - } - DIExpressionCursor Cursor(Ops); + DIExpressionCursor Cursor({}); const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) return; @@ -835,13 +862,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, if (Location.isIndirect()) DwarfExpr.setMemoryLocationKind(); - SmallVector<uint64_t, 8> Ops; - if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Location.getOffset()); - } - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); - DIExpressionCursor Cursor(Ops); + DIExpressionCursor Cursor(DIExpr); const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) return; @@ -898,4 +919,3 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const { return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); } -} // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index e38672792867..68482eb7e358 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,19 +14,32 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#include "DbgValueHistoryCalculator.h" +#include "DwarfDebug.h" #include "DwarfUnit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> namespace llvm { -class StringRef; class AsmPrinter; -class DIE; -class DwarfDebug; class DwarfFile; +class GlobalVariable; +class MCExpr; class MCSymbol; -class LexicalScope; +class MDNode; class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module @@ -37,7 +50,7 @@ class DwarfCompileUnit final : public DwarfUnit { DIE::value_iterator StmtListValue; /// Skeleton unit associated with this unit. - DwarfCompileUnit *Skeleton; + DwarfCompileUnit *Skeleton = nullptr; /// The start of the unit within its section. MCSymbol *LabelBegin; @@ -45,9 +58,8 @@ class DwarfCompileUnit final : public DwarfUnit { /// The start of the unit macro info within macro section. MCSymbol *MacroLabelBegin; - typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList; - typedef llvm::DenseMap<const MDNode *, ImportedEntityList> - ImportedEntityMap; + using ImportedEntityList = SmallVector<const MDNode *, 8>; + using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>; ImportedEntityMap ImportedEntities; @@ -66,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit { // The base address of this unit, if any. Used for relative references in // ranges/locs. - const MCSymbol *BaseAddress; + const MCSymbol *BaseAddress = nullptr; DenseMap<const MDNode *, DIE *> AbstractSPDies; DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; @@ -164,6 +176,7 @@ public: void attachRangesOrLowHighPC(DIE &D, const SmallVectorImpl<InsnRange> &Ranges); + /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. DIE *constructInlinedScopeDIE(LexicalScope *Scope); @@ -181,7 +194,7 @@ public: /// A helper function to create children of a Scope DIE. DIE *createScopeChildrenDIE(LexicalScope *Scope, SmallVectorImpl<DIE *> &Children, - unsigned *ChildScopeCount = nullptr); + bool *HasNonScopeChildren = nullptr); /// \brief Construct a DIE for this subprogram scope. void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); @@ -195,8 +208,9 @@ public: void finishSubprogramDefinition(const DISubprogram *SP); void finishVariableDefinition(const DbgVariable &Var); + /// Find abstract variable associated with Var. - typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + using InlinedVariable = DbgValueHistoryMap::InlinedVariable; DbgVariable *getExistingAbstractVariable(InlinedVariable IV, const DILocalVariable *&Cleansed); DbgVariable *getExistingAbstractVariable(InlinedVariable IV); @@ -275,8 +289,10 @@ public: void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; } const MCSymbol *getBaseAddress() const { return BaseAddress; } + + bool hasDwarfPubSections() const; }; -} // end llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f1b4d9f20ca9..2c9c7d4f3146 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// +//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===// // // The LLVM Compiler Infrastructure // @@ -15,43 +15,67 @@ #include "ByteStreamer.h" #include "DIEHash.h" #include "DebugLocEntry.h" +#include "DebugLocStream.h" +#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "DwarfExpression.h" +#include "DwarfFile.h" #include "DwarfUnit.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" -#include "llvm/Support/Path.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <string> +#include <utility> +#include <vector> using namespace llvm; @@ -61,10 +85,9 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); -static cl::opt<bool> -GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, - cl::desc("Generate GNU-style pubnames and pubtypes"), - cl::init(false)); +static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier( + "use-dwarf-ranges-base-address-specifier", cl::Hidden, + cl::desc("Use base address specifiers in debug_ranges"), cl::init(false)); static cl::opt<bool> GenerateARangeSection("generate-arange-section", cl::Hidden, @@ -75,9 +98,7 @@ static cl::opt<bool> SplitDwarfCrossCuReferences( "split-dwarf-cross-cu-references", cl::Hidden, cl::desc("Enable cross-cu references in DWO files"), cl::init(false)); -namespace { enum DefaultOnOff { Default, Enable, Disable }; -} static cl::opt<DefaultOnOff> UnknownLocations( "use-unknown-locations", cl::Hidden, @@ -94,19 +115,12 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, clEnumVal(Disable, "Disabled")), cl::init(Default)); -static cl::opt<DefaultOnOff> -DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, - cl::desc("Generate DWARF pubnames and pubtypes sections"), - cl::values(clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled")), - cl::init(Default)); - enum LinkageNameOption { DefaultLinkageNames, AllLinkageNames, AbstractLinkageNames }; + static cl::opt<LinkageNameOption> DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, cl::desc("Which DWARF linkage-name attributes to emit."), @@ -142,8 +156,6 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, return false; } -//===----------------------------------------------------------------------===// - bool DbgVariable::isBlockByrefVariable() const { assert(Var && "Invalid complex DbgVariable!"); return Var->getType().resolve()->isBlockByrefStruct(); @@ -198,17 +210,54 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { if (FrameIndexExprs.size() == 1) return FrameIndexExprs; - assert(all_of(FrameIndexExprs, - [](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) && + assert(llvm::all_of(FrameIndexExprs, + [](const FrameIndexExpr &A) { + return A.Expr->isFragment(); + }) && "multiple FI expressions without DW_OP_LLVM_fragment"); std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { return A.Expr->getFragmentInfo()->OffsetInBits < B.Expr->getFragmentInfo()->OffsetInBits; }); + return FrameIndexExprs; } +void DbgVariable::addMMIEntry(const DbgVariable &V) { + assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); + assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); + assert(V.Var == Var && "conflicting variable"); + assert(V.IA == IA && "conflicting inlined-at location"); + + assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); + assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + + // FIXME: This logic should not be necessary anymore, as we now have proper + // deduplication. However, without it, we currently run into the assertion + // below, which means that we are likely dealing with broken input, i.e. two + // non-fragment entries for the same variable at different frame indices. + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + if (!Expr || !Expr->isFragment()) + return; + } + + for (const auto &FIE : V.FrameIndexExprs) + // Ignore duplicate entries. + if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) { + return FIE.FI == Other.FI && FIE.Expr == Other.Expr; + })) + FrameIndexExprs.push_back(FIE); + + assert((FrameIndexExprs.size() == 1 || + llvm::all_of(FrameIndexExprs, + [](FrameIndexExpr &FIE) { + return FIE.Expr && FIE.Expr->isFragment(); + })) && + "conflicting locations for variable"); +} + static const DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), @@ -225,9 +274,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) dwarf::DW_FORM_data4)), AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), - AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { - - CurFn = nullptr; + AccelTypes(TypeAtoms) { const Triple &TT = Asm->TM.getTargetTriple(); // Make sure we know our "debugger tuning." The target option takes @@ -278,7 +325,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) } // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. -DwarfDebug::~DwarfDebug() { } +DwarfDebug::~DwarfDebug() = default; static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); @@ -389,20 +436,8 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } -bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const { - // Opting in to GNU Pubnames/types overrides the default to ensure these are - // generated for things like Gold's gdb_index generation. - if (GenerateGnuPubSections) - return true; - - if (DwarfPubSections == Default) - return tuneForGDB() && !includeMinimalInlineScopes; - - return DwarfPubSections == Enable; -} - void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { - if (!hasDwarfPubSections(U.includeMinimalInlineScopes())) + if (!U.hasDwarfPubSections()) return; U.addFlag(D, dwarf::DW_AT_GNU_pubnames); @@ -417,7 +452,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { StringRef FN = DIUnit->getFilename(); CompilationDir = DIUnit->getDirectory(); - auto OwnedUnit = make_unique<DwarfCompileUnit>( + auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); DwarfCompileUnit &NewCU = *OwnedUnit; DIE &Die = NewCU.getUnitDie(); @@ -428,6 +463,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { Asm->TM.Options.MCOptions.SplitDwarfFile); } + for (auto *IE : DIUnit->getImportedEntities()) + NewCU.addImportedEntity(IE); + // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the @@ -494,6 +532,8 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, const DIImportedEntity *N) { + if (isa<DILocalScope>(N->getScope())) + return; if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) D->addChild(TheCU.constructImportedEntityDIE(N)); } @@ -503,13 +543,18 @@ static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> & sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { std::sort(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { - if (A.Expr != B.Expr && A.Expr && B.Expr) { - auto FragmentA = A.Expr->getFragmentInfo(); - auto FragmentB = B.Expr->getFragmentInfo(); - if (FragmentA && FragmentB) - return FragmentA->OffsetInBits < FragmentB->OffsetInBits; - } - return false; + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; }); GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, @@ -546,18 +591,31 @@ void DwarfDebug::beginModule() { } for (DICompileUnit *CUNode : M->debug_compile_units()) { - if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() && - CUNode->getGlobalVariables().empty() && - CUNode->getImportedEntities().empty() && CUNode->getMacros().empty()) + // FIXME: Move local imported entities into a list attached to the + // subprogram, then this search won't be needed and a + // getImportedEntities().empty() test should go below with the rest. + bool HasNonLocalImportedEntities = llvm::any_of( + CUNode->getImportedEntities(), [](const DIImportedEntity *IE) { + return !isa<DILocalScope>(IE->getScope()); + }); + + if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() && + CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty()) continue; DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); - for (auto *IE : CUNode->getImportedEntities()) - CU.addImportedEntity(IE); // Global Variables. - for (auto *GVE : CUNode->getGlobalVariables()) - GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()}); + for (auto *GVE : CUNode->getGlobalVariables()) { + // Don't bother adding DIGlobalVariableExpressions listed in the CU if we + // already know about the variable and it isn't adding a constant + // expression. + auto &GVMapEntry = GVMap[GVE->getVariable()]; + auto *Expr = GVE->getExpression(); + if (!GVMapEntry.size() || (Expr && Expr->isConstant())) + GVMapEntry.push_back({nullptr, Expr}); + } DenseSet<DIGlobalVariable *> Processed; for (auto *GVE : CUNode->getGlobalVariables()) { DIGlobalVariable *GV = GVE->getVariable(); @@ -682,6 +740,11 @@ void DwarfDebug::finalizeModuleInfo() { TLOF.getDwarfMacinfoSection()->getBeginSymbol()); } + // Emit all frontend-produced Skeleton CUs, i.e., Clang modules. + for (auto *CUNode : MMI->getModule()->debug_compile_units()) + if (CUNode->getDWOId()) + getOrCreateDwarfCompileUnit(CUNode); + // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) @@ -744,12 +807,7 @@ void DwarfDebug::endModule() { } // Emit the pubnames and pubtypes sections if requested. - // The condition is optimistically correct - any CU not using GMLT (& - // implicit/default pubnames state) might still have pubnames. - if (hasDwarfPubSections(/* gmlt */ false)) { - emitDebugPubNames(GenerateGnuPubSections); - emitDebugPubTypes(GenerateGnuPubSections); - } + emitDebugPubSections(); // clean up. // FIXME: AbstractVariables.clear(); @@ -775,9 +833,11 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode))) CU.createAbstractVariable(Cleansed, Scope); } + // Collect variable information from side table maintained by MF. void DwarfDebug::collectVariableInfoFromMFTable( DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) { + SmallDenseMap<InlinedVariable, DbgVariable *> MFVars; for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; @@ -793,26 +853,28 @@ void DwarfDebug::collectVariableInfoFromMFTable( continue; ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode()); - auto RegVar = make_unique<DbgVariable>(Var.first, Var.second); + auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); - if (InfoHolder.addScopeVariable(Scope, RegVar.get())) + if (DbgVariable *DbgVar = MFVars.lookup(Var)) + DbgVar->addMMIEntry(*RegVar); + else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) { + MFVars.insert({Var, RegVar.get()}); ConcreteVariables.push_back(std::move(RegVar)); + } } } // Get .debug_loc entry for the instruction range starting at MI. static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { const DIExpression *Expr = MI->getDebugExpression(); - assert(MI->getNumOperands() == 4); if (MI->getOperand(0).isReg()) { - MachineLocation MLoc; + auto RegOp = MI->getOperand(0); + auto Op1 = MI->getOperand(1); // If the second operand is an immediate, this is a // register-indirect address. - if (!MI->getOperand(1).isImm()) - MLoc.set(MI->getOperand(0).getReg()); - else - MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); + MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); return DebugLocEntry::Value(Expr, MLoc); } if (MI->getOperand(0).isImm()) @@ -967,7 +1029,8 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, LexicalScope &Scope, InlinedVariable IV) { ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode()); - ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second)); + ConcreteVariables.push_back( + llvm::make_unique<DbgVariable>(IV.first, IV.second)); InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); return ConcreteVariables.back().get(); } @@ -1100,7 +1163,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); - const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram(); + const auto *SP = MI->getMF()->getFunction().getSubprogram(); if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; @@ -1198,7 +1261,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn = MF; - auto *SP = MF->getFunction()->getSubprogram(); + auto *SP = MF->getFunction().getSubprogram(); assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode()); if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; @@ -1234,7 +1297,7 @@ void DwarfDebug::skippedNonDebugFunction() { // Gather and emit post-function debug information. void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { - const DISubprogram *SP = MF->getFunction()->getSubprogram(); + const DISubprogram *SP = MF->getFunction().getSubprogram(); assert(CurFn == MF && "endFunction should be called with the same function as beginFunction"); @@ -1309,8 +1372,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, if (auto *Scope = cast_or_null<DIScope>(S)) { Fn = Scope->getFilename(); Dir = Scope->getDirectory(); - if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) - if (getDwarfVersion() >= 4) + if (Line != 0 && getDwarfVersion() >= 4) + if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) Discriminator = LBF->getDiscriminator(); unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); @@ -1440,84 +1503,74 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, } } -/// emitDebugPubNames - Emit visible names into a debug pubnames section. -/// -void DwarfDebug::emitDebugPubNames(bool GnuStyle) { - MCSection *PSec = GnuStyle - ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() - : Asm->getObjFileLowering().getDwarfPubNamesSection(); - - emitDebugPubSection(GnuStyle, PSec, "Names", - &DwarfCompileUnit::getGlobalNames); -} - -void DwarfDebug::emitDebugPubSection( - bool GnuStyle, MCSection *PSec, StringRef Name, - const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) { +/// emitDebugPubSections - Emit visible names and types into debug pubnames and +/// pubtypes sections. +void DwarfDebug::emitDebugPubSections() { for (const auto &NU : CUMap) { DwarfCompileUnit *TheU = NU.second; - - const auto &Globals = (TheU->*Accessor)(); - - if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes())) + if (!TheU->hasDwarfPubSections()) continue; - if (auto *Skeleton = TheU->getSkeleton()) - TheU = Skeleton; + bool GnuStyle = TheU->getCUNode()->getGnuPubnames(); - // Start the dwarf pubnames section. - Asm->OutStreamer->SwitchSection(PSec); + Asm->OutStreamer->SwitchSection( + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection()); + emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames()); - // Emit the header. - Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); - MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); - MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); - Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + Asm->OutStreamer->SwitchSection( + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection()); + emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes()); + } +} - Asm->OutStreamer->EmitLabel(BeginLabel); +void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, + DwarfCompileUnit *TheU, + const StringMap<const DIE *> &Globals) { + if (auto *Skeleton = TheU->getSkeleton()) + TheU = Skeleton; - Asm->OutStreamer->AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); + // Emit the header. + Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); + MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); + MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); - Asm->emitDwarfSymbolReference(TheU->getLabelBegin()); + Asm->OutStreamer->EmitLabel(BeginLabel); - Asm->OutStreamer->AddComment("Compilation Unit Length"); - Asm->EmitInt32(TheU->getLength()); + Asm->OutStreamer->AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); - // Emit the pubnames for this compilation unit. - for (const auto &GI : Globals) { - const char *Name = GI.getKeyData(); - const DIE *Entity = GI.second; + Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); + Asm->emitDwarfSymbolReference(TheU->getLabelBegin()); - Asm->OutStreamer->AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); + Asm->OutStreamer->AddComment("Compilation Unit Length"); + Asm->EmitInt32(TheU->getLength()); - if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); - Asm->OutStreamer->AddComment( - Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + - dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); - Asm->EmitInt8(Desc.toBits()); - } + // Emit the pubnames for this compilation unit. + for (const auto &GI : Globals) { + const char *Name = GI.getKeyData(); + const DIE *Entity = GI.second; + + Asm->OutStreamer->AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); - Asm->OutStreamer->AddComment("External Name"); - Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); + Asm->OutStreamer->AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); } - Asm->OutStreamer->AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer->EmitLabel(EndLabel); + Asm->OutStreamer->AddComment("External Name"); + Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); } -} - -void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { - MCSection *PSec = GnuStyle - ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() - : Asm->getObjFileLowering().getDwarfPubTypesSection(); - emitDebugPubSection(GnuStyle, PSec, "Types", - &DwarfCompileUnit::getGlobalTypes); + Asm->OutStreamer->AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer->EmitLabel(EndLabel); } /// Emit null-terminated strings into a debug str section. @@ -1553,13 +1606,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, MachineLocation Location = Value.getLoc(); if (Location.isIndirect()) DwarfExpr.setMemoryLocationKind(); - SmallVector<uint64_t, 8> Ops; - if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Location.getOffset()); - } - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); - DIExpressionCursor Cursor(Ops); + DIExpressionCursor Cursor(DIExpr); const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) return; @@ -1580,7 +1627,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, const DebugLocEntry::Value &Value = Values[0]; if (Value.isFragment()) { // Emit all fragments that belong to the same variable and range. - assert(all_of(Values, [](DebugLocEntry::Value P) { + assert(llvm::all_of(Values, [](DebugLocEntry::Value P) { return P.isFragment(); }) && "all values are expected to be fragments"); assert(std::is_sorted(Values.begin(), Values.end()) && @@ -1844,17 +1891,49 @@ void DwarfDebug::emitDebugRanges() { // Emit our symbol so we can find the beginning of the range. Asm->OutStreamer->EmitLabel(List.getSym()); + // Gather all the ranges that apply to the same section so they can share + // a base address entry. + MapVector<const MCSection *, std::vector<const RangeSpan *>> MV; for (const RangeSpan &Range : List.getRanges()) { - const MCSymbol *Begin = Range.getStart(); - const MCSymbol *End = Range.getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - if (auto *Base = TheCU->getBaseAddress()) { - Asm->EmitLabelDifference(Begin, Base, Size); - Asm->EmitLabelDifference(End, Base, Size); - } else { - Asm->OutStreamer->EmitSymbolValue(Begin, Size); - Asm->OutStreamer->EmitSymbolValue(End, Size); + MV[&Range.getStart()->getSection()].push_back(&Range); + } + + auto *CUBase = TheCU->getBaseAddress(); + bool BaseIsSet = false; + for (const auto &P : MV) { + // Don't bother with a base address entry if there's only one range in + // this section in this range list - for example ranges for a CU will + // usually consist of single regions from each of many sections + // (-ffunction-sections, or just C++ inline functions) except under LTO + // or optnone where there may be holes in a single CU's section + // contrubutions. + auto *Base = CUBase; + if (!Base && P.second.size() > 1 && + UseDwarfRangesBaseAddressSpecifier) { + BaseIsSet = true; + // FIXME/use care: This may not be a useful base address if it's not + // the lowest address/range in this object. + Base = P.second.front()->getStart(); + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } else if (BaseIsSet) { + BaseIsSet = false; + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + + for (const auto *RS : P.second) { + const MCSymbol *Begin = RS->getStart(); + const MCSymbol *End = RS->getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + if (Base) { + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } else { + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); + } } } @@ -1943,7 +2022,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, // DW_AT_addr_base, DW_AT_ranges_base. DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { - auto OwnedUnit = make_unique<DwarfCompileUnit>( + auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); @@ -2024,8 +2103,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, - getDwoLineTable(CU)); + auto OwnedUnit = llvm::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, + getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 78ee9a162029..2ae0b418a91e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// +//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,40 +14,52 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H +#include "AddressPool.h" #include "DbgValueHistoryCalculator.h" #include "DebugHandlerBase.h" #include "DebugLocStream.h" #include "DwarfAccelTable.h" #include "DwarfFile.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/DIE.h" -#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Metadata.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" #include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstdint> +#include <limits> #include <memory> +#include <utility> +#include <vector> namespace llvm { class AsmPrinter; class ByteStreamer; -class ConstantInt; -class ConstantFP; class DebugLocEntry; +class DIE; class DwarfCompileUnit; -class DwarfDebug; class DwarfTypeUnit; class DwarfUnit; -class MachineModuleInfo; +class LexicalScope; +class MachineFunction; +class MCSection; +class MCSymbol; +class MDNode; +class Module; //===----------------------------------------------------------------------===// /// This class is used to track local variable information. @@ -89,7 +101,7 @@ public: assert(!MInsn && "Already initialized?"); assert((!E || E->isValid()) && "Expected valid expression"); - assert(FI != INT_MAX && "Expected valid index"); + assert(FI != std::numeric_limits<int>::max() && "Expected valid index"); FrameIndexExprs.push_back({FI, E}); } @@ -111,10 +123,12 @@ public: // Accessors. const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } + const DIExpression *getSingleExpression() const { assert(MInsn && FrameIndexExprs.size() <= 1); return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; } + void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } @@ -124,30 +138,7 @@ public: /// Get the FI entries, sorted by fragment offset. ArrayRef<FrameIndexExpr> getFrameIndexExprs() const; bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } - - void addMMIEntry(const DbgVariable &V) { - assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); - assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); - assert(V.Var == Var && "conflicting variable"); - assert(V.IA == IA && "conflicting inlined-at location"); - - assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); - assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); - - if (FrameIndexExprs.size()) { - auto *Expr = FrameIndexExprs.back().Expr; - // Get rid of duplicate non-fragment entries. More than one non-fragment - // dbg.declare makes no sense so ignore all but the first. - if (!Expr || !Expr->isFragment()) - return; - } - FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); - assert(all_of(FrameIndexExprs, - [](FrameIndexExpr &FIE) { - return FIE.Expr && FIE.Expr->isFragment(); - }) && - "conflicting locations for variable"); - } + void addMMIEntry(const DbgVariable &V); // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { @@ -157,6 +148,7 @@ public: return dwarf::DW_TAG_variable; } + /// Return true if DbgVariable is artificial. bool isArtificial() const { if (Var->isArtificial()) @@ -182,6 +174,7 @@ public: "Invalid Expr for DBG_VALUE"); return !FrameIndexExprs.empty(); } + bool isBlockByrefVariable() const; const DIType *getType() const; @@ -191,10 +184,10 @@ private: } }; - /// Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + const MCSymbol *Sym; DwarfCompileUnit *CU; }; @@ -230,7 +223,7 @@ class DwarfDebug : public DebugHandlerBase { ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. - const MachineFunction *CurFn; + const MachineFunction *CurFn = nullptr; /// If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; @@ -296,17 +289,7 @@ class DwarfDebug : public DebugHandlerBase { DwarfAccelTable AccelTypes; // Identify a debugger for "tuning" the debug info. - DebuggerKind DebuggerTuning; - - /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. - /// - /// Returns whether we are "tuning" for a given debugger. - /// Should be used only within the constructor, to set feature flags. - /// @{ - bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } - bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } - bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } - /// @} + DebuggerKind DebuggerTuning = DebuggerKind::Default; MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); @@ -314,7 +297,7 @@ class DwarfDebug : public DebugHandlerBase { return InfoHolder.getUnits(); } - typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + using InlinedVariable = DbgValueHistoryMap::InlinedVariable; void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var, const MDNode *Scope); @@ -358,21 +341,12 @@ class DwarfDebug : public DebugHandlerBase { /// Emit type dies into a hashed accelerator table. void emitAccelTypes(); - /// Emit visible names into a debug pubnames section. - /// \param GnuStyle determines whether or not we want to emit - /// additional information into the table ala newer gcc for gdb - /// index. - void emitDebugPubNames(bool GnuStyle = false); + /// Emit visible names and types into debug pubnames and pubtypes sections. + void emitDebugPubSections(); - /// Emit visible types into a debug pubtypes section. - /// \param GnuStyle determines whether or not we want to emit - /// additional information into the table ala newer gcc for gdb - /// index. - void emitDebugPubTypes(bool GnuStyle = false); - - void emitDebugPubSection( - bool GnuStyle, MCSection *PSec, StringRef Name, - const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const); + void emitDebugPubSection(bool GnuStyle, StringRef Name, + DwarfCompileUnit *TheU, + const StringMap<const DIE *> &Globals); /// Emit null-terminated strings into a debug str section. void emitDebugStr(); @@ -561,11 +535,19 @@ public: /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); - bool hasDwarfPubSections(bool includeMinimalInlineScopes) const; - /// Find the matching DwarfCompileUnit for the given CU DIE. DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); } + + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. + /// + /// Returns whether we are "tuning" for a given debugger. + /// @{ + bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } + bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } + bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + /// @} }; -} // End of namespace llvm -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 3a8568cf39ae..68d25fe37b43 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===// +//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===// // // The LLVM Compiler Infrastructure // @@ -12,13 +12,15 @@ //===----------------------------------------------------------------------===// #include "DwarfExpression.h" -#include "DwarfDebug.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstdint> using namespace llvm; @@ -128,6 +130,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned Size = TRI.getSubRegIdxSize(Idx); unsigned Offset = TRI.getSubRegIdxOffset(Idx); Reg = TRI.getDwarfRegNum(*SR, false); + if (Reg < 0) + continue; // Intersection between the bits we already emitted and the bits // covered by this subregister. @@ -136,14 +140,14 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, // If this sub-register has a DWARF number and we haven't covered // its range, emit a DWARF piece for it. - if (Reg >= 0 && CurSubReg.test(Coverage)) { + if (CurSubReg.test(Coverage)) { // Emit a piece for any gap in the coverage. if (Offset > CurPos) DwarfRegs.push_back({-1, Offset - CurPos, nullptr}); DwarfRegs.push_back( {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"}); if (Offset >= MaxSize) - break; + break; // Mark it as emitted. Coverage.set(Offset, Offset + Size); @@ -336,9 +340,10 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, break; case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: + case dwarf::DW_OP_mul: emitOp(Op->getOp()); break; - case dwarf::DW_OP_deref: { + case dwarf::DW_OP_deref: assert(LocationKind != Register); if (LocationKind != Memory && isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref @@ -347,7 +352,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, else emitOp(dwarf::DW_OP_deref); break; - } case dwarf::DW_OP_constu: assert(LocationKind != Register); emitOp(dwarf::DW_OP_constu); @@ -383,7 +387,6 @@ void DwarfExpression::maskSubRegister() { addAnd(Mask); } - void DwarfExpression::finalize() { assert(DwarfRegs.size() == 0 && "dwarf registers not emitted"); // Emit any outstanding DW_OP_piece operations to mask out subregisters. diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index 728f8ad9225b..ea5cbc40ba35 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===// +//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,21 +14,29 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H -#include "llvm/IR/DebugInfo.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include <cassert> +#include <cstdint> +#include <iterator> namespace llvm { class AsmPrinter; +class APInt; class ByteStreamer; -class TargetRegisterInfo; class DwarfUnit; class DIELoc; +class TargetRegisterInfo; /// Holds a DIExpression and keeps track of how many operands have been consumed /// so far. class DIExpressionCursor { DIExpression::expr_op_iterator Start, End; + public: DIExpressionCursor(const DIExpression *Expr) { if (!Expr) { @@ -42,8 +50,7 @@ public: DIExpressionCursor(ArrayRef<uint64_t> Expr) : Start(Expr.begin()), End(Expr.end()) {} - DIExpressionCursor(const DIExpressionCursor &C) - : Start(C.Start), End(C.End) {} + DIExpressionCursor(const DIExpressionCursor &) = default; /// Consume one operation. Optional<DIExpression::ExprOperand> take() { @@ -73,8 +80,10 @@ public: return *Next; } + /// Determine whether there are any operations left in this expression. operator bool() const { return Start != End; } + DIExpression::expr_op_iterator begin() const { return Start; } DIExpression::expr_op_iterator end() const { return End; } @@ -122,10 +131,13 @@ protected: /// Output a dwarf operand and an optional assembler comment. virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0; + /// Emit a raw signed value. virtual void emitSigned(int64_t Value) = 0; + /// Emit a raw unsigned value. virtual void emitUnsigned(uint64_t Value) = 0; + /// Return whether the given machine register is the frame register in the /// current function. virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; @@ -133,8 +145,10 @@ protected: /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF /// register location description. void addReg(int DwarfReg, const char *Comment = nullptr); + /// Emit a DW_OP_breg operation. void addBReg(int DwarfReg, int Offset); + /// Emit DW_OP_fbreg <Offset>. void addFBReg(int Offset); @@ -156,7 +170,6 @@ protected: bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned MaxSize = ~1U); - /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment. /// \param OffsetInBits This is an optional offset into the location that /// is at the top of the DWARF stack. @@ -164,6 +177,7 @@ protected: /// Emit a shift-right dwarf operation. void addShr(unsigned ShiftBy); + /// Emit a bitwise and dwarf operation. void addAnd(unsigned Mask); @@ -181,6 +195,7 @@ protected: void addStackValue(); ~DwarfExpression() = default; + public: DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {} @@ -189,8 +204,10 @@ public: /// Emit a signed constant. void addSignedConstant(int64_t Value); + /// Emit an unsigned constant. void addUnsignedConstant(uint64_t Value); + /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); @@ -213,6 +230,7 @@ public: bool addMachineRegExpression(const TargetRegisterInfo &TRI, DIExpressionCursor &Expr, unsigned MachineReg, unsigned FragmentOffsetInBits = 0); + /// Emit all remaining operations in the DIExpressionCursor. /// /// \param FragmentOffsetInBits If this is one fragment out of multiple @@ -235,6 +253,7 @@ class DebugLocDwarfExpression final : public DwarfExpression { void emitUnsigned(uint64_t Value) override; bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; + public: DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS) : DwarfExpression(DwarfVersion), BS(BS) {} @@ -253,11 +272,13 @@ const AsmPrinter &AP; unsigned MachineReg) override; public: DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE); + DIELoc *finalize() { DwarfExpression::finalize(); return &DIE; } }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 595f1d91c4bf..3c04c969192d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===// +//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===// // // The LLVM Compiler Infrastructure // @@ -11,13 +11,16 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include <algorithm> +#include <cstdint> + +using namespace llvm; -namespace llvm { DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {} @@ -113,4 +116,3 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { Vars.push_back(Var); return true; } -} diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 54924e9806ed..167ca13c19c1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===// +//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,30 +10,25 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H -#include "AddressPool.h" #include "DwarfStringPool.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include <memory> +#include <utility> namespace llvm { + class AsmPrinter; class DbgVariable; class DwarfCompileUnit; class DwarfUnit; -class DIEAbbrev; -class MCSymbol; -class DIE; class LexicalScope; -class StringRef; -class DwarfDebug; class MCSection; -class MDNode; + class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; @@ -106,6 +101,7 @@ public: DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { return AbstractSPDies; } + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() { return AbstractVariables; } @@ -113,9 +109,12 @@ public: void insertDIE(const MDNode *TypeMD, DIE *Die) { DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); } + DIE *getDIE(const MDNode *TypeMD) { return DITypeNodeToDieMap.lookup(TypeMD); } }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 2066f745e318..aa5f01e88933 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===// +//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===// // // The LLVM Compiler Infrastructure // @@ -8,9 +8,14 @@ //===----------------------------------------------------------------------===// #include "DwarfStringPool.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" +#include <cassert> +#include <utility> using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h index 93a168485a54..1cac3b7c8432 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===// +//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,29 +11,28 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/Allocator.h" -#include <utility> namespace llvm { class AsmPrinter; -class MCSymbol; class MCSection; -class StringRef; // Collection of strings for this unit and assorted symbols. // A String->Symbol mapping of strings used by indirect // references. class DwarfStringPool { - typedef DwarfStringPoolEntry EntryTy; + using EntryTy = DwarfStringPoolEntry; + StringMap<EntryTy, BumpPtrAllocator &> Pool; StringRef Prefix; unsigned NumBytes = 0; bool ShouldCreateSymbols; public: - typedef DwarfStringPoolEntryRef EntryRef; + using EntryRef = DwarfStringPoolEntryRef; DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix); @@ -45,5 +44,7 @@ public: /// Get a reference to an entry in the string pool. EntryRef getEntry(AsmPrinter &Asm, StringRef Str); }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 4f4ebfc56297..911e46235781 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -22,6 +22,9 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -33,9 +36,6 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstdint> #include <string> @@ -473,11 +473,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, if (Location.isIndirect()) DwarfExpr.setMemoryLocationKind(); - SmallVector<uint64_t, 9> Ops; - if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Location.getOffset()); - } + SmallVector<uint64_t, 6> Ops; // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) @@ -964,8 +960,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type // inside C++ composite types to point to the base class with the vtable. - if (auto *ContainingType = - dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder()))) + // Rust uses DW_AT_containing_type to link a vtable to the type + // for which it was created. + if (auto *ContainingType = resolve(CTy->getVTableHolder())) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, *getOrCreateTypeDIE(ContainingType)); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index e14d5be1177a..3cdab57bca70 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -1,4 +1,4 @@ -//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===// +//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,34 @@ //===----------------------------------------------------------------------===// #include "EHStreamer.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/LEB128.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <vector> using namespace llvm; EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} -EHStreamer::~EHStreamer() {} +EHStreamer::~EHStreamer() = default; /// How many leading type ids two landing pads have in common. unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, @@ -50,7 +62,6 @@ unsigned EHStreamer:: computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions) { - // The action table follows the call-site table in the LSDA. The individual // records are of two types: // @@ -478,13 +489,14 @@ void EHStreamer::emitExceptionTable() { sizeof(int8_t) + // TType format (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size TTypeBaseOffset; // TType base offset - unsigned SizeAlign = (4 - TotalSize) & 3; + unsigned PadBytes = (4 - TotalSize) & 3; if (HaveTTData) { // Account for any extra padding that will be added to the call site table // length. - Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign); - SizeAlign = 0; + Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes, + "@TType base offset"); + PadBytes = 0; } bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); @@ -494,7 +506,9 @@ void EHStreamer::emitExceptionTable() { Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); // Add extra padding if it wasn't added to the TType base offset. - Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + Asm->EmitPaddedULEB128(CallSiteTableLength, + CallSiteTableLengthSize + PadBytes, + "Call site table length"); // Emit the landing pad site information. unsigned idx = 0; @@ -547,7 +561,9 @@ void EHStreamer::emitExceptionTable() { Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); // Add extra padding if it wasn't added to the TType base offset. - Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + Asm->EmitPaddedULEB128(CallSiteTableLength, + CallSiteTableLengthSize + PadBytes, + "Call site table length"); unsigned Entry = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 080fdd14b467..7962b761d8de 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -1,4 +1,4 @@ -//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===// +//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,17 +16,16 @@ #include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Compiler.h" namespace llvm { + +class AsmPrinter; struct LandingPadInfo; -class MachineModuleInfo; class MachineInstr; -class MachineFunction; +class MachineModuleInfo; class MCSymbol; -class MCSymbolRefExpr; - -template <typename T> -class SmallVectorImpl; +template <typename T> class SmallVectorImpl; /// Emits exception handling directives. class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { @@ -45,11 +44,12 @@ protected: struct PadRange { // The index of the landing pad. unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. unsigned RangeIndex; }; - typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + using RangeMapType = DenseMap<MCSymbol *, PadRange>; /// Structure describing an entry in the actions table. struct ActionEntry { @@ -66,6 +66,7 @@ protected: // LPad contains the landing pad start labels. const LandingPadInfo *LPad; // Null indicates that there is no landing pad. + unsigned Action; }; @@ -131,7 +132,7 @@ public: /// `false' otherwise. static bool callToNoUnwindFunction(const MachineInstr *MI); }; -} -#endif +} // end namespace llvm +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index c5795559fb7d..e459c02c9a6e 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 035f1a0063aa..e0cc241dd23f 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include <cctype> #include <cstddef> #include <cstdint> diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 5d485f213573..a6a8e84a949f 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "WinException.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -20,6 +19,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" @@ -30,15 +33,9 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCWin64EH.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; WinException::WinException(AsmPrinter *A) : EHStreamer(A) { @@ -66,7 +63,7 @@ void WinException::beginFunction(const MachineFunction *MF) { bool hasLandingPads = !MF->getLandingPads().empty(); bool hasEHFunclets = MF->hasEHFunclets(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI(); @@ -75,14 +72,14 @@ void WinException::beginFunction(const MachineFunction *MF) { EHPersonality Per = EHPersonality::Unknown; const Function *PerFn = nullptr; - if (F->hasPersonalityFn()) { - PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) { + PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); Per = classifyEHPersonality(PerFn); } - bool forceEmitPersonality = F->hasPersonalityFn() && + bool forceEmitPersonality = F.hasPersonalityFn() && !isNoOpWithoutInvoke(Per) && - F->needsUnwindTableEntry(); + F.needsUnwindTableEntry(); shouldEmitPersonality = forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && @@ -101,7 +98,7 @@ void WinException::beginFunction(const MachineFunction *MF) { // functions may still refer to it. const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); } shouldEmitLSDA = hasEHFunclets; @@ -118,10 +115,10 @@ void WinException::endFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA) return; - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); EHPersonality Per = EHPersonality::Unknown; - if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); // Get rid of any dead landing pads if we're not using funclets. In funclet // schemes, the landing pad is not actually reachable. It only exists so @@ -173,8 +170,8 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, // Give catches and cleanups a name based off of their parent function and // their funclet entry block's number. const MachineFunction *MF = MBB->getParent(); - const Function *F = MF->getFunction(); - StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + const Function &F = MF->getFunction(); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); MCContext &Ctx = MF->getContext(); StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + @@ -186,7 +183,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) { CurrentFuncletEntry = &MBB; - const Function *F = Asm->MF->getFunction(); + const Function &F = Asm->MF->getFunction(); // If a symbol was not provided for the funclet, invent one. if (!Sym) { Sym = getMCSymbolForMBB(Asm, &MBB); @@ -201,7 +198,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // We want our funclet's entry point to be aligned such that no nops will be // present after the label. Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), - F); + &F); // Now that we've emitted the alignment directive, point at our funclet. Asm->OutStreamer->EmitLabel(Sym); @@ -218,8 +215,8 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, const Function *PerFn = nullptr; // Determine which personality routine we are using for this funclet. - if (F->hasPersonalityFn()) - PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI); @@ -240,10 +237,10 @@ void WinException::endFunclet() { const MachineFunction *MF = Asm->MF; if (shouldEmitMoves || shouldEmitPersonality) { - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); EHPersonality Per = EHPersonality::Unknown; - if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); @@ -252,7 +249,7 @@ void WinException::endFunclet() { !CurrentFuncletEntry->isCleanupFuncletEntry()) { // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. - StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); @@ -536,7 +533,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { // Emit a label assignment with the SEH frame offset so we can use it for // llvm.x86.seh.recoverfp. StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); const MCExpr *MCOffset = @@ -631,11 +628,11 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, } void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); auto &OS = *Asm->OutStreamer; const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; @@ -941,8 +938,8 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, /// indexed by state number instead of IP. void WinException::emitExceptHandlerTable(const MachineFunction *MF) { MCStreamer &OS = *Asm->OutStreamer; - const Function *F = MF->getFunction(); - StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + const Function &F = MF->getFunction(); + StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { @@ -959,7 +956,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { OS.EmitLabel(LSDALabel); const Function *Per = - dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index aa9c8e94d08a..7042bc997223 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -1,4 +1,4 @@ -//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// +//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// // // The LLVM Compiler Infrastructure // @@ -15,31 +15,54 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/AtomicExpandUtils.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <iterator> using namespace llvm; #define DEBUG_TYPE "atomic-expand" namespace { + class AtomicExpand: public FunctionPass { - const TargetLowering *TLI; + const TargetLowering *TLI = nullptr; + public: static char ID; // Pass identification, replacement for typeid - AtomicExpand() : FunctionPass(ID), TLI(nullptr) { + + AtomicExpand() : FunctionPass(ID) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } @@ -92,39 +115,41 @@ namespace { llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); }; -} + +} // end anonymous namespace char AtomicExpand::ID = 0; + char &llvm::AtomicExpandID = AtomicExpand::ID; + INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } -namespace { // Helper functions to retrieve the size of atomic instructions. -unsigned getAtomicOpSize(LoadInst *LI) { +static unsigned getAtomicOpSize(LoadInst *LI) { const DataLayout &DL = LI->getModule()->getDataLayout(); return DL.getTypeStoreSize(LI->getType()); } -unsigned getAtomicOpSize(StoreInst *SI) { +static unsigned getAtomicOpSize(StoreInst *SI) { const DataLayout &DL = SI->getModule()->getDataLayout(); return DL.getTypeStoreSize(SI->getValueOperand()->getType()); } -unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { +static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { const DataLayout &DL = RMWI->getModule()->getDataLayout(); return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); } -unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { +static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { const DataLayout &DL = CASI->getModule()->getDataLayout(); return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); } // Helper functions to retrieve the alignment of atomic instructions. -unsigned getAtomicOpAlign(LoadInst *LI) { +static unsigned getAtomicOpAlign(LoadInst *LI) { unsigned Align = LI->getAlignment(); // In the future, if this IR restriction is relaxed, we should // return DataLayout::getABITypeAlignment when there's no align @@ -133,7 +158,7 @@ unsigned getAtomicOpAlign(LoadInst *LI) { return Align; } -unsigned getAtomicOpAlign(StoreInst *SI) { +static unsigned getAtomicOpAlign(StoreInst *SI) { unsigned Align = SI->getAlignment(); // In the future, if this IR restriction is relaxed, we should // return DataLayout::getABITypeAlignment when there's no align @@ -142,7 +167,7 @@ unsigned getAtomicOpAlign(StoreInst *SI) { return Align; } -unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { +static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { // TODO(PR27168): This instruction has no alignment attribute, but unlike the // default alignment for load/store, the default here is to assume // it has NATURAL alignment, not DataLayout-specified alignment. @@ -150,7 +175,7 @@ unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); } -unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { +static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { // TODO(PR27168): same comment as above. const DataLayout &DL = CASI->getModule()->getDataLayout(); return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); @@ -160,14 +185,12 @@ unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { // and is of appropriate alignment, to be passed through for target // lowering. (Versus turning into a __atomic libcall) template <typename Inst> -bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { +static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { unsigned Size = getAtomicOpSize(I); unsigned Align = getAtomicOpAlign(I); return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; } -} // end anonymous namespace - bool AtomicExpand::runOnFunction(Function &F) { auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); if (!TPC) @@ -320,16 +343,10 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); - // The trailing fence is emitted before the instruction instead of after - // because there is no easy way of setting Builder insertion point after - // an instruction. So we must erase it from the BB, and insert it back - // in the right place. // We have a guard here because not every atomic operation generates a // trailing fence. - if (TrailingFence) { - TrailingFence->removeFromParent(); - TrailingFence->insertAfter(I); - } + if (TrailingFence) + TrailingFence->moveAfter(I); return (LeadingFence || TrailingFence); } @@ -562,6 +579,7 @@ struct PartwordMaskValues { Value *Mask; Value *Inv_Mask; }; + } // end anonymous namespace /// This is a helper function which builds instructions to provide @@ -580,7 +598,6 @@ struct PartwordMaskValues { /// include only the part that would've been loaded from Addr. /// /// Inv_Mask: The inverse of Mask. - static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize) { @@ -686,7 +703,6 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, /// part of the value. void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { - assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); AtomicOrdering MemOpOrder = AI->getOrdering(); @@ -943,7 +959,6 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * return NewCI; } - bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index be93ff0dad29..d11f375b176e 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,21 +15,20 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" -#include <utility> +#include "llvm/Target/TargetMachine.h" + using namespace llvm; // This flag is used by the template base class for BasicTTIImpl, and here to // provide a definition. cl::opt<unsigned> - llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), - cl::desc("Threshold for partial unrolling"), - cl::Hidden); +llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), + cl::Hidden); BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp deleted file mode 100644 index 2c41b597843c..000000000000 --- a/lib/CodeGen/BranchCoalescing.cpp +++ /dev/null @@ -1,758 +0,0 @@ -//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Coalesce basic blocks guarded by the same branch condition into a single -/// basic block. -/// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "branch-coalescing" - -static cl::opt<cl::boolOrDefault> - EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, - cl::desc("enable coalescing of duplicate branches")); - -STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced"); -STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged"); -STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); - -//===----------------------------------------------------------------------===// -// BranchCoalescing -//===----------------------------------------------------------------------===// -/// -/// Improve scheduling by coalescing branches that depend on the same condition. -/// This pass looks for blocks that are guarded by the same branch condition -/// and attempts to merge the blocks together. Such opportunities arise from -/// the expansion of select statements in the IR. -/// -/// For example, consider the following LLVM IR: -/// -/// %test = icmp eq i32 %x 0 -/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 -/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 -/// -/// This IR expands to the following machine code on PowerPC: -/// -/// BB#0: derived from LLVM BB %entry -/// Live Ins: %F1 %F3 %X6 -/// <SNIP1> -/// %vreg0<def> = COPY %F1; F8RC:%vreg0 -/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4 -/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>; -/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 -/// BCC 76, %vreg5, <BB#2>; CRRC:%vreg5 -/// Successors according to CFG: BB#1(?%) BB#2(?%) -/// -/// BB#1: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 -/// Successors according to CFG: BB#2(?%) -/// -/// BB#2: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 BB#1 -/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>; -/// F8RC:%vreg9,%vreg8,%vreg0 -/// <SNIP2> -/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5 -/// Successors according to CFG: BB#3(?%) BB#4(?%) -/// -/// BB#3: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#2 -/// Successors according to CFG: BB#4(?%) -/// -/// BB#4: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#2 BB#3 -/// %vreg13<def> = PHI %vreg12, <BB#3>, %vreg2, <BB#2>; -/// F8RC:%vreg13,%vreg12,%vreg2 -/// <SNIP3> -/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use> -/// -/// When this pattern is detected, branch coalescing will try to collapse -/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3. -/// -/// If all conditions are meet, IR should collapse to: -/// -/// BB#0: derived from LLVM BB %entry -/// Live Ins: %F1 %F3 %X6 -/// <SNIP1> -/// %vreg0<def> = COPY %F1; F8RC:%vreg0 -/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4 -/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>; -/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 -/// <SNIP2> -/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5 -/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%) -/// BB#4(0x55555554 / 0x80000000 = 66.67%) -/// -/// BB#1: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 -/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%) -/// -/// BB#4: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 BB#1 -/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>; -/// F8RC:%vreg9,%vreg8,%vreg0 -/// %vreg13<def> = PHI %vreg12, <BB#1>, %vreg2, <BB#0>; -/// F8RC:%vreg13,%vreg12,%vreg2 -/// <SNIP3> -/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use> -/// -/// Branch Coalescing does not split blocks, it moves everything in the same -/// direction ensuring it does not break use/definition semantics. -/// -/// PHI nodes and its corresponding use instructions are moved to its successor -/// block if there are no uses within the successor block PHI nodes. PHI -/// node ordering cannot be assumed. -/// -/// Non-PHI can be moved up to the predecessor basic block or down to the -/// successor basic block following any PHI instructions. Whether it moves -/// up or down depends on whether the register(s) defined in the instructions -/// are used in current block or in any PHI instructions at the beginning of -/// the successor block. - -namespace { - -class BranchCoalescing : public MachineFunctionPass { - struct CoalescingCandidateInfo { - MachineBasicBlock *BranchBlock; // Block containing the branch - MachineBasicBlock *BranchTargetBlock; // Block branched to - MachineBasicBlock *FallThroughBlock; // Fall-through if branch not taken - SmallVector<MachineOperand, 4> Cond; - bool MustMoveDown; - bool MustMoveUp; - - CoalescingCandidateInfo(); - void clear(); - }; - - MachineDominatorTree *MDT; - MachinePostDominatorTree *MPDT; - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - - void initialize(MachineFunction &F); - bool canCoalesceBranch(CoalescingCandidateInfo &Cand); - bool identicalOperands(ArrayRef<MachineOperand> OperandList1, - ArrayRef<MachineOperand> OperandList2) const; - bool validateCandidates(CoalescingCandidateInfo &SourceRegion, - CoalescingCandidateInfo &TargetRegion) const; - - static bool isBranchCoalescingEnabled() { - return EnableBranchCoalescing == cl::BOU_TRUE; - } - -public: - static char ID; - - BranchCoalescing() : MachineFunctionPass(ID) { - initializeBranchCoalescingPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - StringRef getPassName() const override { return "Branch Coalescing"; } - - bool mergeCandidates(CoalescingCandidateInfo &SourceRegion, - CoalescingCandidateInfo &TargetRegion); - bool canMoveToBeginning(const MachineInstr &MI, - const MachineBasicBlock &MBB) const; - bool canMoveToEnd(const MachineInstr &MI, - const MachineBasicBlock &MBB) const; - bool canMerge(CoalescingCandidateInfo &SourceRegion, - CoalescingCandidateInfo &TargetRegion) const; - void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB, - MachineBasicBlock *TargetRegionMBB); - bool runOnMachineFunction(MachineFunction &MF) override; -}; -} // End anonymous namespace. - -char BranchCoalescing::ID = 0; -char &llvm::BranchCoalescingID = BranchCoalescing::ID; - -INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE, - "Branch Coalescing", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", - false, false) - -BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() - : BranchBlock(nullptr), BranchTargetBlock(nullptr), - FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {} - -void BranchCoalescing::CoalescingCandidateInfo::clear() { - BranchBlock = nullptr; - BranchTargetBlock = nullptr; - FallThroughBlock = nullptr; - Cond.clear(); - MustMoveDown = false; - MustMoveUp = false; -} - -void BranchCoalescing::initialize(MachineFunction &MF) { - MDT = &getAnalysis<MachineDominatorTree>(); - MPDT = &getAnalysis<MachinePostDominatorTree>(); - TII = MF.getSubtarget().getInstrInfo(); - MRI = &MF.getRegInfo(); -} - -/// -/// Analyze the branch statement to determine if it can be coalesced. This -/// method analyses the branch statement for the given candidate to determine -/// if it can be coalesced. If the branch can be coalesced, then the -/// BranchTargetBlock and the FallThroughBlock are recorded in the specified -/// Candidate. -/// -///\param[in,out] Cand The coalescing candidate to analyze -///\return true if and only if the branch can be coalesced, false otherwise -/// -bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { - DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber() - << " can be coalesced:"); - MachineBasicBlock *FalseMBB = nullptr; - - if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB, - Cand.Cond)) { - DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n"); - return false; - } - - for (auto &I : Cand.BranchBlock->terminators()) { - DEBUG(dbgs() << "Looking at terminator : " << I << "\n"); - if (!I.isBranch()) - continue; - - if (I.getNumOperands() != I.getNumExplicitOperands()) { - DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I - << "\n"); - return false; - } - } - - if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) { - DEBUG(dbgs() << "EH Pad - skip\n"); - return false; - } - - // For now only consider triangles (i.e, BranchTargetBlock is set, - // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock) - if (!Cand.BranchTargetBlock || FalseMBB || - !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) { - DEBUG(dbgs() << "Does not form a triangle - skip\n"); - return false; - } - - // Ensure there are only two successors - if (Cand.BranchBlock->succ_size() != 2) { - DEBUG(dbgs() << "Does not have 2 successors - skip\n"); - return false; - } - - // Sanity check - the block must be able to fall through - assert(Cand.BranchBlock->canFallThrough() && - "Expecting the block to fall through!"); - - // We have already ensured there are exactly two successors to - // BranchBlock and that BranchTargetBlock is a successor to BranchBlock. - // Ensure the single fall though block is empty. - MachineBasicBlock *Succ = - (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock) - ? *Cand.BranchBlock->succ_rbegin() - : *Cand.BranchBlock->succ_begin(); - - assert(Succ && "Expecting a valid fall-through block\n"); - - if (!Succ->empty()) { - DEBUG(dbgs() << "Fall-through block contains code -- skip\n"); - return false; - } - - if (!Succ->isSuccessor(Cand.BranchTargetBlock)) { - DEBUG(dbgs() - << "Successor of fall through block is not branch taken block\n"); - return false; - } - - Cand.FallThroughBlock = Succ; - DEBUG(dbgs() << "Valid Candidate\n"); - return true; -} - -/// -/// Determine if the two operand lists are identical -/// -/// \param[in] OpList1 operand list -/// \param[in] OpList2 operand list -/// \return true if and only if the operands lists are identical -/// -bool BranchCoalescing::identicalOperands( - ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const { - - if (OpList1.size() != OpList2.size()) { - DEBUG(dbgs() << "Operand list is different size\n"); - return false; - } - - for (unsigned i = 0; i < OpList1.size(); ++i) { - const MachineOperand &Op1 = OpList1[i]; - const MachineOperand &Op2 = OpList2[i]; - - DEBUG(dbgs() << "Op1: " << Op1 << "\n" - << "Op2: " << Op2 << "\n"); - - if (Op1.isIdenticalTo(Op2)) { - DEBUG(dbgs() << "Op1 and Op2 are identical!\n"); - continue; - } - - // If the operands are not identical, but are registers, check to see if the - // definition of the register produces the same value. If they produce the - // same value, consider them to be identical. - if (Op1.isReg() && Op2.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg()) && - TargetRegisterInfo::isVirtualRegister(Op2.getReg())) { - MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg()); - MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg()); - if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) { - DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def - << " produce the same value!\n"); - } else { - DEBUG(dbgs() << "Operands produce different values\n"); - return false; - } - } else { - DEBUG(dbgs() << "The operands are not provably identical.\n"); - return false; - } - } - return true; -} - -/// -/// Moves ALL PHI instructions in SourceMBB to beginning of TargetMBB -/// and update them to refer to the new block. PHI node ordering -/// cannot be assumed so it does not matter where the PHI instructions -/// are moved to in TargetMBB. -/// -/// \param[in] SourceMBB block to move PHI instructions from -/// \param[in] TargetMBB block to move PHI instructions to -/// -void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, - MachineBasicBlock *TargetMBB) { - - MachineBasicBlock::iterator MI = SourceMBB->begin(); - MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI(); - - if (MI == ME) { - DEBUG(dbgs() << "SourceMBB contains no PHI instructions.\n"); - return; - } - - // Update all PHI instructions in SourceMBB and move to top of TargetMBB - for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) { - MachineInstr &PHIInst = *Iter; - for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) { - MachineOperand &MO = PHIInst.getOperand(i); - if (MO.getMBB() == SourceMBB) - MO.setMBB(TargetMBB); - } - } - TargetMBB->splice(TargetMBB->begin(), SourceMBB, MI, ME); -} - -/// -/// This function checks if MI can be moved to the beginning of the TargetMBB -/// following PHI instructions. A MI instruction can be moved to beginning of -/// the TargetMBB if there are no uses of it within the TargetMBB PHI nodes. -/// -/// \param[in] MI the machine instruction to move. -/// \param[in] TargetMBB the machine basic block to move to -/// \return true if it is safe to move MI to beginning of TargetMBB, -/// false otherwise. -/// -bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI, - const MachineBasicBlock &TargetMBB - ) const { - - DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of " - << TargetMBB.getNumber() << "\n"); - - for (auto &Def : MI.defs()) { // Looking at Def - for (auto &Use : MRI->use_instructions(Def.getReg())) { - if (Use.isPHI() && Use.getParent() == &TargetMBB) { - DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n"); - return false; - } - } - } - - DEBUG(dbgs() << " Safe to move to the beginning.\n"); - return true; -} - -/// -/// This function checks if MI can be moved to the end of the TargetMBB, -/// immediately before the first terminator. A MI instruction can be moved -/// to then end of the TargetMBB if no PHI node defines what MI uses within -/// it's own MBB. -/// -/// \param[in] MI the machine instruction to move. -/// \param[in] TargetMBB the machine basic block to move to -/// \return true if it is safe to move MI to end of TargetMBB, -/// false otherwise. -/// -bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI, - const MachineBasicBlock &TargetMBB - ) const { - - DEBUG(dbgs() << "Checking if " << MI << " can move to end of " - << TargetMBB.getNumber() << "\n"); - - for (auto &Use : MI.uses()) { - if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) { - MachineInstr *DefInst = MRI->getVRegDef(Use.getReg()); - if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) { - DEBUG(dbgs() << " *** Cannot move this instruction ***\n"); - return false; - } else { - DEBUG(dbgs() << " *** def is in another block -- safe to move!\n"); - } - } - } - - DEBUG(dbgs() << " Safe to move to the end.\n"); - return true; -} - -/// -/// This method checks to ensure the two coalescing candidates follows the -/// expected pattern required for coalescing. -/// -/// \param[in] SourceRegion The candidate to move statements from -/// \param[in] TargetRegion The candidate to move statements to -/// \return true if all instructions in SourceRegion.BranchBlock can be merged -/// into a block in TargetRegion; false otherwise. -/// -bool BranchCoalescing::validateCandidates( - CoalescingCandidateInfo &SourceRegion, - CoalescingCandidateInfo &TargetRegion) const { - - if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock) - llvm_unreachable("Expecting SourceRegion to immediately follow TargetRegion"); - else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock)) - llvm_unreachable("Expecting TargetRegion to dominate SourceRegion"); - else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock)) - llvm_unreachable("Expecting SourceRegion to post-dominate TargetRegion"); - else if (!TargetRegion.FallThroughBlock->empty() || - !SourceRegion.FallThroughBlock->empty()) - llvm_unreachable("Expecting fall-through blocks to be empty"); - - return true; -} - -/// -/// This method determines whether the two coalescing candidates can be merged. -/// In order to be merged, all instructions must be able to -/// 1. Move to the beginning of the SourceRegion.BranchTargetBlock; -/// 2. Move to the end of the TargetRegion.BranchBlock. -/// Merging involves moving the instructions in the -/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock). -/// -/// This function first try to move instructions from the -/// TargetRegion.BranchTargetBlock down, to the beginning of the -/// SourceRegion.BranchTargetBlock. This is not possible if any register defined -/// in TargetRegion.BranchTargetBlock is used in a PHI node in the -/// SourceRegion.BranchTargetBlock. In this case, check whether the statement -/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately -/// before the branch statement). If it cannot move, then these blocks cannot -/// be merged. -/// -/// Note that there is no analysis for moving instructions past the fall-through -/// blocks because they are confirmed to be empty. An assert is thrown if they -/// are not. -/// -/// \param[in] SourceRegion The candidate to move statements from -/// \param[in] TargetRegion The candidate to move statements to -/// \return true if all instructions in SourceRegion.BranchBlock can be merged -/// into a block in TargetRegion, false otherwise. -/// -bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, - CoalescingCandidateInfo &TargetRegion) const { - if (!validateCandidates(SourceRegion, TargetRegion)) - return false; - - // Walk through PHI nodes first and see if they force the merge into the - // SourceRegion.BranchTargetBlock. - for (MachineBasicBlock::iterator - I = SourceRegion.BranchBlock->instr_begin(), - E = SourceRegion.BranchBlock->getFirstNonPHI(); - I != E; ++I) { - for (auto &Def : I->defs()) - for (auto &Use : MRI->use_instructions(Def.getReg())) { - if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) { - DEBUG(dbgs() << "PHI " << *I << " defines register used in another " - "PHI within branch target block -- can't merge\n"); - NumPHINotMoved++; - return false; - } - if (Use.getParent() == SourceRegion.BranchBlock) { - DEBUG(dbgs() << "PHI " << *I - << " defines register used in this " - "block -- all must move down\n"); - SourceRegion.MustMoveDown = true; - } - } - } - - // Walk through the MI to see if they should be merged into - // TargetRegion.BranchBlock (up) or SourceRegion.BranchTargetBlock (down) - for (MachineBasicBlock::iterator - I = SourceRegion.BranchBlock->getFirstNonPHI(), - E = SourceRegion.BranchBlock->end(); - I != E; ++I) { - if (!canMoveToBeginning(*I, *SourceRegion.BranchTargetBlock)) { - DEBUG(dbgs() << "Instruction " << *I - << " cannot move down - must move up!\n"); - SourceRegion.MustMoveUp = true; - } - if (!canMoveToEnd(*I, *TargetRegion.BranchBlock)) { - DEBUG(dbgs() << "Instruction " << *I - << " cannot move up - must move down!\n"); - SourceRegion.MustMoveDown = true; - } - } - - return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true; -} - -/// Merge the instructions from SourceRegion.BranchBlock, -/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into -/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and -/// TargetRegion.FallThroughBlock respectively. -/// -/// The successors for blocks in TargetRegion will be updated to use the -/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion -/// will be removed from the function. -/// -/// A region consists of a BranchBlock, a FallThroughBlock, and a -/// BranchTargetBlock. Branch coalesce works on patterns where the -/// TargetRegion's BranchTargetBlock must also be the SourceRegions's -/// BranchBlock. -/// -/// Before mergeCandidates: -/// -/// +---------------------------+ -/// | TargetRegion.BranchBlock | -/// +---------------------------+ -/// / | -/// / +--------------------------------+ -/// | | TargetRegion.FallThroughBlock | -/// \ +--------------------------------+ -/// \ | -/// +----------------------------------+ -/// | TargetRegion.BranchTargetBlock | -/// | SourceRegion.BranchBlock | -/// +----------------------------------+ -/// / | -/// / +--------------------------------+ -/// | | SourceRegion.FallThroughBlock | -/// \ +--------------------------------+ -/// \ | -/// +----------------------------------+ -/// | SourceRegion.BranchTargetBlock | -/// +----------------------------------+ -/// -/// After mergeCandidates: -/// -/// +-----------------------------+ -/// | TargetRegion.BranchBlock | -/// | SourceRegion.BranchBlock | -/// +-----------------------------+ -/// / | -/// / +---------------------------------+ -/// | | TargetRegion.FallThroughBlock | -/// | | SourceRegion.FallThroughBlock | -/// \ +---------------------------------+ -/// \ | -/// +----------------------------------+ -/// | SourceRegion.BranchTargetBlock | -/// +----------------------------------+ -/// -/// \param[in] SourceRegion The candidate to move blocks from -/// \param[in] TargetRegion The candidate to move blocks to -/// -bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, - CoalescingCandidateInfo &TargetRegion) { - - if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) { - llvm_unreachable("Cannot have both MustMoveDown and MustMoveUp set!"); - return false; - } - - if (!validateCandidates(SourceRegion, TargetRegion)) - return false; - - // Start the merging process by first handling the BranchBlock. - // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block - moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock); - - // Move remaining instructions in SourceRegion.BranchBlock into - // TargetRegion.BranchBlock - MachineBasicBlock::iterator firstInstr = - SourceRegion.BranchBlock->getFirstNonPHI(); - MachineBasicBlock::iterator lastInstr = - SourceRegion.BranchBlock->getFirstTerminator(); - - MachineBasicBlock *Source = SourceRegion.MustMoveDown - ? SourceRegion.BranchTargetBlock - : TargetRegion.BranchBlock; - - MachineBasicBlock::iterator Target = - SourceRegion.MustMoveDown - ? SourceRegion.BranchTargetBlock->getFirstNonPHI() - : TargetRegion.BranchBlock->getFirstTerminator(); - - Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr); - - // Once PHI and instructions have been moved we need to clean up the - // control flow. - - // Remove SourceRegion.FallThroughBlock before transferring successors of - // SourceRegion.BranchBlock to TargetRegion.BranchBlock. - SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock); - TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs( - SourceRegion.BranchBlock); - // Update branch in TargetRegion.BranchBlock to jump to - // SourceRegion.BranchTargetBlock - // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock. - TargetRegion.BranchBlock->ReplaceUsesOfBlockWith( - SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock); - // Remove the branch statement(s) in SourceRegion.BranchBlock - MachineBasicBlock::iterator I = - SourceRegion.BranchBlock->terminators().begin(); - while (I != SourceRegion.BranchBlock->terminators().end()) { - MachineInstr &CurrInst = *I; - ++I; - if (CurrInst.isBranch()) - CurrInst.eraseFromParent(); - } - - // Fall-through block should be empty since this is part of the condition - // to coalesce the branches. - assert(TargetRegion.FallThroughBlock->empty() && - "FallThroughBlocks should be empty!"); - - // Transfer successor information and move PHIs down to the - // branch-taken block. - TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs( - SourceRegion.FallThroughBlock); - TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock); - - // Remove the blocks from the function. - assert(SourceRegion.BranchBlock->empty() && - "Expecting branch block to be empty!"); - SourceRegion.BranchBlock->eraseFromParent(); - - assert(SourceRegion.FallThroughBlock->empty() && - "Expecting fall-through block to be empty!\n"); - SourceRegion.FallThroughBlock->eraseFromParent(); - - NumBlocksCoalesced++; - return true; -} - -bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) { - - if (skipFunction(*MF.getFunction()) || MF.empty() || - !isBranchCoalescingEnabled()) - return false; - - bool didSomething = false; - - DEBUG(dbgs() << "******** Branch Coalescing ********\n"); - initialize(MF); - - DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); - - CoalescingCandidateInfo Cand1, Cand2; - // Walk over blocks and find candidates to merge - // Continue trying to merge with the first candidate found, as long as merging - // is successfull. - for (MachineBasicBlock &MBB : MF) { - bool MergedCandidates = false; - do { - MergedCandidates = false; - Cand1.clear(); - Cand2.clear(); - - Cand1.BranchBlock = &MBB; - - // If unable to coalesce the branch, then continue to next block - if (!canCoalesceBranch(Cand1)) - break; - - Cand2.BranchBlock = Cand1.BranchTargetBlock; - if (!canCoalesceBranch(Cand2)) - break; - - // Sanity check - // The branch-taken block of the second candidate should post-dominate the - // first candidate - assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) && - "Branch-taken block should post-dominate first candidate"); - - if (!identicalOperands(Cand1.Cond, Cand2.Cond)) { - DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and " - << Cand2.BranchBlock->getNumber() - << " have different branches\n"); - break; - } - if (!canMerge(Cand2, Cand1)) { - DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber() - << " and " << Cand2.BranchBlock->getNumber() << "\n"); - NumBlocksNotCoalesced++; - continue; - } - DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber() - << " and " << Cand1.BranchTargetBlock->getNumber() << "\n"); - MergedCandidates = mergeCandidates(Cand2, Cand1); - if (MergedCandidates) - didSomething = true; - - DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n"); - } while (MergedCandidates); - } - -#ifndef NDEBUG - // Verify MF is still valid after branch coalescing - if (didSomething) - MF.verify(nullptr, "Error in code produced by branch coalescing"); -#endif // NDEBUG - - DEBUG(dbgs() << "Finished Branch Coalescing\n"); - return didSomething; -} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 3c439e66944b..7f358a679366 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -19,27 +19,35 @@ #include "BranchFolding.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" @@ -48,10 +56,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstddef> #include <iterator> @@ -81,8 +86,8 @@ TailMergeThreshold("tail-merge-threshold", // TODO: This should be replaced with a target query. static cl::opt<unsigned> TailMergeSize("tail-merge-size", - cl::desc("Min number of instructions to consider tail merging"), - cl::init(3), cl::Hidden); + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); namespace { @@ -106,13 +111,14 @@ namespace { } // end anonymous namespace char BranchFolderPass::ID = 0; + char &llvm::BranchFolderPassID = BranchFolderPass::ID; INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); @@ -365,15 +371,37 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } -void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest) { - TII->ReplaceTailWithBranchTo(OldInst, NewDest); - +void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock &NewDest) { if (UpdateLiveIns) { - NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *MRI, *NewDest); + // OldInst should always point to an instruction. + MachineBasicBlock &OldMBB = *OldInst->getParent(); + LiveRegs.clear(); + LiveRegs.addLiveOuts(OldMBB); + // Move backward to the place where will insert the jump. + MachineBasicBlock::iterator I = OldMBB.end(); + do { + --I; + LiveRegs.stepBackward(*I); + } while (I != OldInst); + + // Merging the tails may have switched some undef operand to non-undef ones. + // Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the + // register. + for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) { + // We computed the liveins with computeLiveIn earlier and should only see + // full registers: + assert(P.LaneMask == LaneBitmask::getAll() && + "Can only handle full register."); + MCPhysReg Reg = P.PhysReg; + if (!LiveRegs.available(*MRI, Reg)) + continue; + DebugLoc DL; + BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg); + } } + TII->ReplaceTailWithBranchTo(OldInst, &NewDest); ++NumTailMerge; } @@ -408,7 +436,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); if (UpdateLiveIns) - computeLiveIns(LiveRegs, *MRI, *NewMBB); + computeAndAddLiveIns(LiveRegs, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -585,8 +613,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); if (CommonTailLen == 0) return false; - DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() - << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1) + << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen << '\n'); // It's almost always profitable to merge any number of non-terminator @@ -657,7 +685,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() && + return EffectiveTailLen >= 2 && MF->getFunction().optForSize() && (I1 == MBB1->begin() || I2 == MBB2->begin()); } @@ -742,7 +770,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, SameTails[commonTailIndex].getTailStartPos(); MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " + DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size " << maxCommonTailLength); // If the split block unconditionally falls-thru to SuccBB, it will be @@ -766,43 +794,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } -void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) { - MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - - std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size()); - for (unsigned int i = 0 ; i != SameTails.size() ; ++i) { - if (i != commonTailIndex) - NextCommonInsts[i] = SameTails[i].getTailStartPos(); - else { - assert(SameTails[i].getTailStartPos() == MBB->begin() && - "MBB is not a common tail only block"); - } - } - - for (auto &MI : *MBB) { - if (MI.isDebugValue()) - continue; - DebugLoc DL = MI.getDebugLoc(); - for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { - if (i == commonTailIndex) - continue; - - auto &Pos = NextCommonInsts[i]; - assert(Pos != SameTails[i].getBlock()->end() && - "Reached BB end within common tail"); - while (Pos->isDebugValue()) { - ++Pos; - assert(Pos != SameTails[i].getBlock()->end() && - "Reached BB end within common tail"); - } - assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); - DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); - NextCommonInsts[i] = ++Pos; - } - MI.setDebugLoc(DL); - } -} - static void mergeOperations(MachineBasicBlock::iterator MBBIStartPos, MachineBasicBlock &MBBCommon) { @@ -853,6 +844,67 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, } } +void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size()); + for (unsigned int i = 0 ; i != SameTails.size() ; ++i) { + if (i != commonTailIndex) { + NextCommonInsts[i] = SameTails[i].getTailStartPos(); + mergeOperations(SameTails[i].getTailStartPos(), *MBB); + } else { + assert(SameTails[i].getTailStartPos() == MBB->begin() && + "MBB is not a common tail only block"); + } + } + + for (auto &MI : *MBB) { + if (MI.isDebugValue()) + continue; + DebugLoc DL = MI.getDebugLoc(); + for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { + if (i == commonTailIndex) + continue; + + auto &Pos = NextCommonInsts[i]; + assert(Pos != SameTails[i].getBlock()->end() && + "Reached BB end within common tail"); + while (Pos->isDebugValue()) { + ++Pos; + assert(Pos != SameTails[i].getBlock()->end() && + "Reached BB end within common tail"); + } + assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); + DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); + NextCommonInsts[i] = ++Pos; + } + MI.setDebugLoc(DL); + } + + if (UpdateLiveIns) { + LivePhysRegs NewLiveIns(*TRI); + computeLiveIns(NewLiveIns, *MBB); + + // The flag merging may lead to some register uses no longer using the + // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary. + for (MachineBasicBlock *Pred : MBB->predecessors()) { + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(*Pred); + MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator(); + for (unsigned Reg : NewLiveIns) { + if (!LiveRegs.available(*MRI, Reg)) + continue; + DebugLoc DL; + BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF), + Reg); + } + } + + MBB->clearLiveIns(); + addLiveIns(*MBB, NewLiveIns); + } +} + // See if any of the blocks in MergePotentials (which all have SuccBB as a // successor, or all have no successor if it is null) can be tail-merged. // If there is a successor, any blocks in MergePotentials that are not @@ -868,20 +920,17 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, bool MadeChange = false; DEBUG(dbgs() << "\nTryTailMergeBlocks: "; - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() - << (i == e-1 ? "" : ", "); - dbgs() << "\n"; - if (SuccBB) { - dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() + << printMBBReference(*MergePotentials[i].getBlock()) + << (i == e - 1 ? "" : ", "); + dbgs() << "\n"; if (SuccBB) { + dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n'; if (PredBB) - dbgs() << " which has fall-through from BB#" - << PredBB->getNumber() << "\n"; - } - dbgs() << "Looking for common tails of at least " - << MinCommonTailLength << " instruction" - << (MinCommonTailLength == 1 ? "" : "s") << '\n'; - ); + dbgs() << " which has fall-through from " + << printMBBReference(*PredBB) << "\n"; + } dbgs() << "Looking for common tails of at least " + << MinCommonTailLength << " instruction" + << (MinCommonTailLength == 1 ? "" : "s") << '\n';); // Sort by hash value so that blocks with identical end sequences sort // together. @@ -955,22 +1004,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Recompute common tail MBB's edge weights and block frequency. setCommonTailEdgeWeights(*MBB); - // Merge debug locations across identical instructions for common tail. - MergeCommonTailDebugLocs(commonTailIndex); + // Merge debug locations, MMOs and undef flags across identical instructions + // for common tail. + mergeCommonTails(commonTailIndex); // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() + DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB) << " for "); for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { if (commonTailIndex == i) continue; - DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() - << (i == e-1 ? "" : ", ")); - // Merge operations (MMOs, undef flags) - mergeOperations(SameTails[i].getTailStartPos(), *MBB); + DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock()) + << (i == e - 1 ? "" : ", ")); // Hack the end off BB i, making it jump to BB commonTailIndex instead. - ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); + replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].getMPIter()); } @@ -1463,7 +1511,7 @@ ReoptimizeBlock: } if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && - MF.getFunction()->optForSize()) { + MF.getFunction().optForSize()) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing // performance. Therefore, only consider this for optsize functions. @@ -1819,7 +1867,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI)) return MBB->end(); - // Find out what registers are live. Note this routine is ignoring other live // registers which are only used by instructions in successor blocks. for (const MachineOperand &MO : PI->operands()) { @@ -1921,7 +1968,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { // // BB2: // r1 = op2, ... - // = op3, r1<kill> + // = op3, killed r1 IsSafe = false; break; } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 92681137e4c6..0f0952550137 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -1,4 +1,4 @@ -//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// +//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,20 +10,27 @@ #ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H #define LLVM_LIB_CODEGEN_BRANCHFOLDING_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/Compiler.h" +#include <cstdint> #include <vector> namespace llvm { - class MachineBlockFrequencyInfo; - class MachineBranchProbabilityInfo; - class MachineFunction; - class MachineModuleInfo; - class MachineLoopInfo; - class TargetInstrInfo; - class TargetRegisterInfo; + +class BasicBlock; +class MachineBlockFrequencyInfo; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoopInfo; +class MachineModuleInfo; +class MachineRegisterInfo; +class raw_ostream; +class TargetInstrInfo; +class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY BranchFolder { public: @@ -49,6 +56,7 @@ namespace llvm { class MergePotentialsElt { unsigned Hash; MachineBasicBlock *Block; + public: MergePotentialsElt(unsigned h, MachineBasicBlock *b) : Hash(h), Block(b) {} @@ -62,7 +70,9 @@ namespace llvm { bool operator<(const MergePotentialsElt &) const; }; - typedef std::vector<MergePotentialsElt>::iterator MPIterator; + + using MPIterator = std::vector<MergePotentialsElt>::iterator; + std::vector<MergePotentialsElt> MergePotentials; SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; DenseMap<const MachineBasicBlock *, int> FuncletMembership; @@ -70,6 +80,7 @@ namespace llvm { class SameTailElt { MPIterator MPIter; MachineBasicBlock::iterator TailStartPos; + public: SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) : MPIter(mp), TailStartPos(tsp) {} @@ -77,18 +88,23 @@ namespace llvm { MPIterator getMPIter() const { return MPIter; } + MergePotentialsElt &getMergePotentialsElt() const { return *getMPIter(); } + MachineBasicBlock::iterator getTailStartPos() const { return TailStartPos; } + unsigned getHash() const { return getMergePotentialsElt().getHash(); } + MachineBasicBlock *getBlock() const { return getMergePotentialsElt().getBlock(); } + bool tailIsWholeBlock() const { return TailStartPos == getBlock()->begin(); } @@ -96,6 +112,7 @@ namespace llvm { void setBlock(MachineBasicBlock *MBB) { getMergePotentialsElt().setBlock(MBB); } + void setTailStartPos(MachineBasicBlock::iterator Pos) { TailStartPos = Pos; } @@ -120,6 +137,7 @@ namespace llvm { class MBFIWrapper { public: MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); raw_ostream &printBlockFreq(raw_ostream &OS, @@ -146,8 +164,8 @@ namespace llvm { /// Delete the instruction OldInst and everything after it, replacing it /// with an unconditional branch to NewDest. - void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest); + void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock &NewDest); /// Given a machine basic block and an iterator into it, split the MBB so /// that the part before the iterator falls into the part starting at the @@ -182,8 +200,8 @@ namespace llvm { unsigned &commonTailIndex); /// Create merged DebugLocs of identical instructions across SameTails and - /// assign it to the instruction in common tail. - void MergeCommonTailDebugLocs(unsigned commonTailIndex); + /// assign it to the instruction in common tail; merge MMOs and undef flags. + void mergeCommonTails(unsigned commonTailIndex); bool OptimizeBranches(MachineFunction &MF); @@ -203,6 +221,7 @@ namespace llvm { /// the function, move the instructions before MBB terminator if it's legal. bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); }; -} -#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 27ee12c4c5ff..0d87f142c7cc 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -1,4 +1,4 @@ -//===-- BranchRelaxation.cpp ----------------------------------------------===// +//===- BranchRelaxation.cpp -----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -10,14 +10,25 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> using namespace llvm; @@ -30,6 +41,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed"); #define BRANCH_RELAX_NAME "Branch relaxation pass" namespace { + class BranchRelaxation : public MachineFunctionPass { /// BasicBlockInfo - Information about the offset and size of a single /// basic block. @@ -38,16 +50,16 @@ class BranchRelaxation : public MachineFunctionPass { /// of this basic block. /// /// The offset is always aligned as required by the basic block. - unsigned Offset; + unsigned Offset = 0; /// Size - Size of the basic block in bytes. If the block contains /// inline assembly, this is a worst case estimate. /// /// The size does not include any alignment padding whether from the /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; + unsigned Size = 0; - BasicBlockInfo() : Offset(0), Size(0) {} + BasicBlockInfo() = default; /// Compute the offset immediately following this block. \p MBB is the next /// block. @@ -95,18 +107,18 @@ class BranchRelaxation : public MachineFunctionPass { public: static char ID; - BranchRelaxation() : MachineFunctionPass(ID) { } + + BranchRelaxation() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override { - return BRANCH_RELAX_NAME; - } + StringRef getPassName() const override { return BRANCH_RELAX_NAME; } }; -} +} // end anonymous namespace char BranchRelaxation::ID = 0; + char &llvm::BranchRelaxationPassID = BranchRelaxation::ID; INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false) @@ -131,7 +143,7 @@ void BranchRelaxation::verify() { LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { for (auto &MBB : *MF) { const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) << format("size=%#x\n", BBI.Size); } } @@ -196,7 +208,7 @@ void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { } } - /// Insert a new empty basic block and insert it after \BB +/// Insert a new empty basic block and insert it after \BB MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) { // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = @@ -233,7 +245,6 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - NewBB->transferSuccessors(OrigBB); OrigBB->addSuccessor(NewBB); OrigBB->addSuccessor(DestBB); @@ -259,7 +270,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB); + computeAndAddLiveIns(LiveRegs, *NewBB); ++NumSplit; @@ -276,13 +287,10 @@ bool BranchRelaxation::isBlockInRange( if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset)) return true; - DEBUG( - dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber() - << " from BB#" << MI.getParent()->getNumber() - << " to " << DestOffset - << " offset " << DestOffset - BrOffset - << '\t' << MI - ); + DEBUG(dbgs() << "Out of range branch to destination " + << printMBBReference(DestBB) << " from " + << printMBBReference(*MI.getParent()) << " to " << DestOffset + << " offset " << DestOffset - BrOffset << '\t' << MI); return false; } @@ -348,16 +356,16 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB); + computeAndAddLiveIns(LiveRegs, NewBB); } // We now have an appropriate fall-through block in place (either naturally or // just created), so we can invert the condition. MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); - DEBUG(dbgs() << " Insert B to BB#" << TBB->getNumber() - << ", invert condition and change dest. to BB#" - << NextBB.getNumber() << '\n'); + DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB) + << ", invert condition and change dest. to " + << printMBBReference(NextBB) << '\n'); unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7f3c6da91268..07ba5d36cc96 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -4,7 +4,6 @@ add_llvm_library(LLVMCodeGen Analysis.cpp AtomicExpandPass.cpp BasicTargetTransformInfo.cpp - BranchCoalescing.cpp BranchFolding.cpp BranchRelaxation.cpp BuiltinGCs.cpp @@ -12,7 +11,6 @@ add_llvm_library(LLVMCodeGen CallingConvLower.cpp CodeGen.cpp CodeGenPrepare.cpp - CountingFunctionInserter.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp DetectDeadLanes.cpp @@ -22,6 +20,7 @@ add_llvm_library(LLVMCodeGen EdgeBundles.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp + ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp FaultMaps.cpp @@ -43,7 +42,7 @@ add_llvm_library(LLVMCodeGen LexicalScopes.cpp LiveDebugValues.cpp LiveDebugVariables.cpp - LiveIntervalAnalysis.cpp + LiveIntervals.cpp LiveInterval.cpp LiveIntervalUnion.cpp LivePhysRegs.cpp @@ -77,6 +76,7 @@ add_llvm_library(LLVMCodeGen MachineLoopInfo.cpp MachineModuleInfo.cpp MachineModuleInfoImpls.cpp + MachineOperand.cpp MachineOptimizationRemarkEmitter.cpp MachineOutliner.cpp MachinePassRegistry.cpp @@ -114,6 +114,7 @@ add_llvm_library(LLVMCodeGen RegisterPressure.cpp RegisterScavenging.cpp RenameIndependentSubregs.cpp + MIRCanonicalizerPass.cpp RegisterUsageInfo.cpp RegUsageInfoCollector.cpp RegUsageInfoPropagate.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index c2ced19458ed..b8920a601938 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -1,4 +1,4 @@ -//===------------------------ CalcSpillWeights.cpp ------------------------===// +//===- CalcSpillWeights.cpp -----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,17 +8,23 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <tuple> + using namespace llvm; #define DEBUG_TYPE "calcspillweights" @@ -64,13 +70,24 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, return sub == hsub ? hreg : 0; const TargetRegisterClass *rc = mri.getRegClass(reg); + if (!tri.enableMultipleCopyHints()) { + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; + + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); + } + + unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); + if (rc->contains(CopiedPReg)) + return CopiedPReg; - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; + // Check if reg:sub matches so that a super register could be hinted. + if (sub) + return tri.getMatchingSuperReg(CopiedPReg, sub, rc); - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); + return 0; } // Check if all values in LI are rematerializable @@ -127,8 +144,21 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void -VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { +void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { + float weight = weightCalcHelper(li); + // Check if unspillable. + if (weight < 0) + return; + li.weight = weight; +} + +float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start, + SlotIndex end) { + return weightCalcHelper(li, &start, &end); +} + +float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, + SlotIndex *end) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); MachineBasicBlock *mbb = nullptr; @@ -138,20 +168,73 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { unsigned numInstr = 0; // Number of instructions using li SmallPtrSet<MachineInstr*, 8> visited; - // Find the best physreg hint and the best virtreg hint. - float bestPhys = 0, bestVirt = 0; - unsigned hintPhys = 0, hintVirt = 0; - - // Don't recompute a target specific hint. - bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg); // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + bool localSplitArtifact = start && end; + + // Do not update future local split artifacts. + bool updateLI = !localSplitArtifact; + + if (localSplitArtifact) { + MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end); + assert(localMBB == LIS.getMBBFromIndex(*start) && + "start and end are expected to be in the same basic block"); + + // Local split artifact will have 2 additional copy instructions and they + // will be in the same BB. + // localLI = COPY other + // ... + // other = COPY localLI + totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB); + totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB); + + numInstr += 2; + } + + // CopyHint is a sortable hint derived from a COPY instruction. + struct CopyHint { + unsigned Reg; + float Weight; + bool IsPhys; + unsigned HintOrder; + CopyHint(unsigned R, float W, bool P, unsigned HR) : + Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {} + bool operator<(const CopyHint &rhs) const { + // Always prefer any physreg hint. + if (IsPhys != rhs.IsPhys) + return (IsPhys && !rhs.IsPhys); + if (Weight != rhs.Weight) + return (Weight > rhs.Weight); + + // This is just a temporary way to achive NFC for targets that don't + // enable multiple copy hints. HintOrder should be removed when all + // targets return true in enableMultipleCopyHints(). + return (HintOrder < rhs.HintOrder); + +#if 0 // Should replace the HintOrder check, see above. + // (just for the purpose of maintaining the set) + return Reg < rhs.Reg; +#endif + } + }; + std::set<CopyHint> CopyHints; + + // Temporary: see comment for HintOrder above. + unsigned CopyHintOrder = 0; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { MachineInstr *mi = &*(I++); + + // For local split artifacts, we are interested only in instructions between + // the expected start and end of the range. + SlotIndex si = LIS.getInstructionIndex(*mi); + if (localSplitArtifact && ((si < *start) || (si > *end))) + continue; + numInstr++; if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; @@ -180,7 +263,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { } // Get allocation hints from copies. - if (noHint || !mi->isCopy()) + if (!mi->isCopy() || + (TargetHint.first != 0 && !tri.enableMultipleCopyHints())) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -190,39 +274,43 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) { - bestPhys = hweight; - hintPhys = hint; - } - } else { - if (hweight > bestVirt) { - bestVirt = hweight; - hintVirt = hint; - } - } + if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint)) + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint), + (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++))); } Hint.clear(); - // Always prefer the physreg hint. - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); + // Pass all the sorted copy hints to mri. + if (updateLI && CopyHints.size()) { + // Remove a generic hint if previously added by target. + if (TargetHint.first == 0 && TargetHint.second) + mri.clearSimpleHint(li.reg); + + for (auto &Hint : CopyHints) { + if (TargetHint.first != 0 && Hint.Reg == TargetHint.second) + // Don't add again the target-type hint. + continue; + mri.addRegAllocationHint(li.reg, Hint.Reg); + if (!tri.enableMultipleCopyHints()) + break; + } + // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; } // If the live interval was already unspillable, leave it that way. if (!Spillable) - return; + return -1.0; // Mark li as unspillable if all live ranges are tiny and the interval // is not live at any reg mask. If the interval is live at a reg mask // spilling may be required. - if (li.isZeroLength(LIS.getSlotIndexes()) && + if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) && !li.isLiveAtIndexes(LIS.getRegMaskSlots())) { li.markNotSpillable(); - return; + return -1.0; } // If all of the definitions of the interval are re-materializable, @@ -232,5 +320,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalize(totalWeight, li.getSize(), numInstr); + if (localSplitArtifact) + return normalize(totalWeight, start->distance(*end), numInstr); + return normalize(totalWeight, li.getSize(), numInstr); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 7cad4d031169..3593089b206d 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -15,14 +15,14 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index b7fd45a3f6a6..c0d7eb4cf47b 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -21,16 +21,15 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); - initializeBranchCoalescingPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); - initializeCountingFunctionInserterPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandISelPseudosPass(Registry); + initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); initializeFinalizeMachineBundlesPass(Registry); @@ -78,7 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); - initializeRAFastPass(Registry); + initializeRegAllocFastPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); @@ -100,6 +99,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeWinEHPreparePass(Registry); initializeXRayInstrumentationPass(Registry); + initializeMIRCanonicalizerPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index dc02a00e0fcc..c4794380f791 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,13 +13,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -28,38 +32,69 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" -#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::PatternMatch; @@ -75,6 +110,12 @@ STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); +STATISTIC(NumMemoryInstsPhiCreated, + "Number of phis created when address " + "computations were sunk to memory instructions"); +STATISTIC(NumMemoryInstsSelectCreated, + "Number of select created when address " + "computations were sunk to memory instructions"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumAndsAdded, @@ -85,12 +126,6 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); -STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); -STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); -STATISTIC(NumMemCmpGreaterThanMax, - "Number of memcmp calls with size greater than max size"); -STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); - static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -151,25 +186,51 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); -static cl::opt<unsigned> MemCmpNumLoadsPerBlock( - "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), - cl::desc("The number of loads per basic block for inline expansion of " - "memcmp that is only being compared against zero.")); +static cl::opt<bool> DisableComplexAddrModes( + "disable-complex-addr-modes", cl::Hidden, cl::init(false), + cl::desc("Disables combining addressing modes with different parts " + "in optimizeMemoryInst.")); + +static cl::opt<bool> +AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), + cl::desc("Allow creation of Phis in Address sinking.")); + +static cl::opt<bool> +AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false), + cl::desc("Allow creation of selects in Address sinking.")); + +static cl::opt<bool> AddrSinkCombineBaseReg( + "addr-sink-combine-base-reg", cl::Hidden, cl::init(true), + cl::desc("Allow combining of BaseReg field in Address sinking.")); + +static cl::opt<bool> AddrSinkCombineBaseGV( + "addr-sink-combine-base-gv", cl::Hidden, cl::init(true), + cl::desc("Allow combining of BaseGV field in Address sinking.")); + +static cl::opt<bool> AddrSinkCombineBaseOffs( + "addr-sink-combine-base-offs", cl::Hidden, cl::init(true), + cl::desc("Allow combining of BaseOffs field in Address sinking.")); + +static cl::opt<bool> AddrSinkCombineScaledReg( + "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), + cl::desc("Allow combining of ScaledReg field in Address sinking.")); namespace { -typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; -typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; -typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; -typedef SmallVector<Instruction *, 16> SExts; -typedef DenseMap<Value *, SExts> ValueToSExts; + +using SetOfInstrs = SmallPtrSet<Instruction *, 16>; +using TypeIsSExt = PointerIntPair<Type *, 1, bool>; +using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>; +using SExts = SmallVector<Instruction *, 16>; +using ValueToSExts = DenseMap<Value *, SExts>; + class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { - const TargetMachine *TM; + const TargetMachine *TM = nullptr; const TargetSubtargetInfo *SubtargetInfo; - const TargetLowering *TLI; + const TargetLowering *TLI = nullptr; const TargetRegisterInfo *TRI; - const TargetTransformInfo *TTI; + const TargetTransformInfo *TTI = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; std::unique_ptr<BlockFrequencyInfo> BFI; @@ -181,11 +242,14 @@ class TypePromotionTransaction; /// Keeps track of non-local addresses that have been sunk into a block. /// This allows us to avoid inserting duplicate code for blocks with - /// multiple load/stores of the same address. - ValueMap<Value*, Value*> SunkAddrs; + /// multiple load/stores of the same address. The usage of WeakTrackingVH + /// enables SunkAddrs to be treated as a cache whose entries can be + /// invalidated if a sunken address computation has been erased. + ValueMap<Value*, WeakTrackingVH> SunkAddrs; /// Keeps track of all instructions inserted for the current function. SetOfInstrs InsertedInsts; + /// Keeps track of the type of the related instruction before their /// promotion for the current function. InstrToOrigTy PromotedInsts; @@ -206,15 +270,15 @@ class TypePromotionTransaction; bool OptSize; /// DataLayout for the Function being processed. - const DataLayout *DL; + const DataLayout *DL = nullptr; public: static char ID; // Pass identification, replacement for typeid - CodeGenPrepare() - : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr), - DL(nullptr) { + + CodeGenPrepare() : FunctionPass(ID) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } + bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "CodeGen Prepare"; } @@ -264,11 +328,12 @@ class TypePromotionTransaction; SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); - bool splitIndirectCriticalEdges(Function &F); }; -} + +} // end anonymous namespace char CodeGenPrepare::ID = 0; + INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) @@ -302,9 +367,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); OptSize = F.optForSize(); + ProfileSummaryInfo *PSI = + getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (ProfileGuidedSectionPrefix) { - ProfileSummaryInfo *PSI = - getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (PSI->isFunctionHotInCallGraph(&F)) F.setSectionPrefix(".hot"); else if (PSI->isFunctionColdInCallGraph(&F)) @@ -313,7 +378,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (!OptSize && TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI && + TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); BasicBlock* BB = &*F.begin(); @@ -340,7 +406,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Split some critical edges where one of the sources is an indirect branch, // to help generate sane code for PHIs involving such edges. - EverMadeChange |= splitIndirectCriticalEdges(F); + EverMadeChange |= SplitIndirectBrCriticalEdges(F); bool MadeChange = true; while (MadeChange) { @@ -485,160 +551,6 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { return DestBB; } -// Return the unique indirectbr predecessor of a block. This may return null -// even if such a predecessor exists, if it's not useful for splitting. -// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) -// predecessors of BB. -static BasicBlock * -findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) { - // If the block doesn't have any PHIs, we don't care about it, since there's - // no point in splitting it. - PHINode *PN = dyn_cast<PHINode>(BB->begin()); - if (!PN) - return nullptr; - - // Verify we have exactly one IBR predecessor. - // Conservatively bail out if one of the other predecessors is not a "regular" - // terminator (that is, not a switch or a br). - BasicBlock *IBB = nullptr; - for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { - BasicBlock *PredBB = PN->getIncomingBlock(Pred); - TerminatorInst *PredTerm = PredBB->getTerminator(); - switch (PredTerm->getOpcode()) { - case Instruction::IndirectBr: - if (IBB) - return nullptr; - IBB = PredBB; - break; - case Instruction::Br: - case Instruction::Switch: - OtherPreds.push_back(PredBB); - continue; - default: - return nullptr; - } - } - - return IBB; -} - -// Split critical edges where the source of the edge is an indirectbr -// instruction. This isn't always possible, but we can handle some easy cases. -// This is useful because MI is unable to split such critical edges, -// which means it will not be able to sink instructions along those edges. -// This is especially painful for indirect branches with many successors, where -// we end up having to prepare all outgoing values in the origin block. -// -// Our normal algorithm for splitting critical edges requires us to update -// the outgoing edges of the edge origin block, but for an indirectbr this -// is hard, since it would require finding and updating the block addresses -// the indirect branch uses. But if a block only has a single indirectbr -// predecessor, with the others being regular branches, we can do it in a -// different way. -// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr. -// We can split D into D0 and D1, where D0 contains only the PHIs from D, -// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and -// create the following structure: -// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 -bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) { - // Check whether the function has any indirectbrs, and collect which blocks - // they may jump to. Since most functions don't have indirect branches, - // this lowers the common case's overhead to O(Blocks) instead of O(Edges). - SmallSetVector<BasicBlock *, 16> Targets; - for (auto &BB : F) { - auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator()); - if (!IBI) - continue; - - for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) - Targets.insert(IBI->getSuccessor(Succ)); - } - - if (Targets.empty()) - return false; - - bool Changed = false; - for (BasicBlock *Target : Targets) { - SmallVector<BasicBlock *, 16> OtherPreds; - BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); - // If we did not found an indirectbr, or the indirectbr is the only - // incoming edge, this isn't the kind of edge we're looking for. - if (!IBRPred || OtherPreds.empty()) - continue; - - // Don't even think about ehpads/landingpads. - Instruction *FirstNonPHI = Target->getFirstNonPHI(); - if (FirstNonPHI->isEHPad() || Target->isLandingPad()) - continue; - - BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); - // It's possible Target was its own successor through an indirectbr. - // In this case, the indirectbr now comes from BodyBlock. - if (IBRPred == Target) - IBRPred = BodyBlock; - - // At this point Target only has PHIs, and BodyBlock has the rest of the - // block's body. Create a copy of Target that will be used by the "direct" - // preds. - ValueToValueMapTy VMap; - BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); - - for (BasicBlock *Pred : OtherPreds) { - // If the target is a loop to itself, then the terminator of the split - // block needs to be updated. - if (Pred == Target) - BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - else - Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - } - - // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that - // they are clones, so the number of PHIs are the same. - // (a) Remove the edge coming from IBRPred from the "Direct" PHI - // (b) Leave that as the only edge in the "Indirect" PHI. - // (c) Merge the two in the body block. - BasicBlock::iterator Indirect = Target->begin(), - End = Target->getFirstNonPHI()->getIterator(); - BasicBlock::iterator Direct = DirectSucc->begin(); - BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); - - assert(&*End == Target->getTerminator() && - "Block was expected to only contain PHIs"); - - while (Indirect != End) { - PHINode *DirPHI = cast<PHINode>(Direct); - PHINode *IndPHI = cast<PHINode>(Indirect); - - // Now, clean up - the direct block shouldn't get the indirect value, - // and vice versa. - DirPHI->removeIncomingValue(IBRPred); - Direct++; - - // Advance the pointer here, to avoid invalidation issues when the old - // PHI is erased. - Indirect++; - - PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); - NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), - IBRPred); - - // Create a PHI in the body block, to merge the direct and indirect - // predecessors. - PHINode *MergePHI = - PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); - MergePHI->addIncoming(NewIndPHI, Target); - MergePHI->addIncoming(DirPHI, DirectSucc); - - IndPHI->replaceAllUsesWith(MergePHI); - IndPHI->eraseFromParent(); - } - - Changed = true; - } - - return Changed; -} - /// Eliminate blocks that contain only PHI nodes, debug info directives, and an /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split /// edges in ways that are non-optimal for isel. Start by eliminating these @@ -827,7 +739,6 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, return true; } - /// Eliminate a basic block that has only phi's and an unconditional branch in /// it. void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { @@ -948,6 +859,21 @@ static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl<GCRelocateInst *> &Targets) { bool MadeChange = false; + // We must ensure the relocation of derived pointer is defined after + // relocation of base pointer. If we find a relocation corresponding to base + // defined earlier than relocation of base then we move relocation of base + // right before found relocation. We consider only relocation in the same + // basic block as relocation of base. Relocations from other basic block will + // be skipped by optimization and we do not care about them. + for (auto R = RelocatedBase->getParent()->getFirstInsertionPt(); + &*R != RelocatedBase; ++R) + if (auto RI = dyn_cast<GCRelocateInst>(R)) + if (RI->getStatepoint() == RelocatedBase->getStatepoint()) + if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) { + RelocatedBase->moveBefore(RI); + break; + } + for (GCRelocateInst *ToReplace : Targets) { assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && "Not relocating a derived object of the original base object"); @@ -1125,6 +1051,7 @@ static bool SinkCast(CastInst *CI) { // If we removed all uses, nuke the cast. if (CI->use_empty()) { + salvageDebugInfo(*CI); CI->eraseFromParent(); MadeChange = true; } @@ -1137,7 +1064,6 @@ static bool SinkCast(CastInst *CI) { /// reduce the number of virtual registers that must be created and coalesced. /// /// Return true if any changes are made. -/// static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL) { // Sink only "cheap" (or nop) address-space casts. This is a weaker condition @@ -1641,656 +1567,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return true; } -// This class provides helper functions to expand a memcmp library call into an -// inline expansion. -class MemCmpExpansion { - struct ResultBlock { - BasicBlock *BB; - PHINode *PhiSrc1; - PHINode *PhiSrc2; - ResultBlock(); - }; - - CallInst *CI; - ResultBlock ResBlock; - unsigned MaxLoadSize; - unsigned NumBlocks; - unsigned NumBlocksNonOneByte; - unsigned NumLoadsPerBlock; - std::vector<BasicBlock *> LoadCmpBlocks; - BasicBlock *EndBlock; - PHINode *PhiRes; - bool IsUsedForZeroCmp; - const DataLayout &DL; - IRBuilder<> Builder; - - unsigned calculateNumBlocks(unsigned Size); - void createLoadCmpBlocks(); - void createResultBlock(); - void setupResultBlockPHINodes(); - void setupEndBlockPHINodes(); - void emitLoadCompareBlock(unsigned Index, unsigned LoadSize, - unsigned GEPIndex); - Value *getCompareLoadPairs(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed); - void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed); - void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex); - void emitMemCmpResultBlock(); - Value *getMemCmpExpansionZeroCase(unsigned Size); - Value *getMemCmpEqZeroOneBlock(unsigned Size); - Value *getMemCmpOneBlock(unsigned Size); - unsigned getLoadSize(unsigned Size); - unsigned getNumLoads(unsigned Size); - -public: - MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, - unsigned NumLoadsPerBlock, const DataLayout &DL); - Value *getMemCmpExpansion(uint64_t Size); -}; - -MemCmpExpansion::ResultBlock::ResultBlock() - : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {} - -// Initialize the basic block structure required for expansion of memcmp call -// with given maximum load size and memcmp size parameter. -// This structure includes: -// 1. A list of load compare blocks - LoadCmpBlocks. -// 2. An EndBlock, split from original instruction point, which is the block to -// return from. -// 3. ResultBlock, block to branch to for early exit when a -// LoadCmpBlock finds a difference. -MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, - unsigned MaxLoadSize, unsigned LoadsPerBlock, - const DataLayout &TheDataLayout) - : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock), - DL(TheDataLayout), Builder(CI) { - - // A memcmp with zero-comparison with only one block of load and compare does - // not need to set up any extra blocks. This case could be handled in the DAG, - // but since we have all of the machinery to flexibly expand any memcpy here, - // we choose to handle this case too to avoid fragmented lowering. - IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - NumBlocks = calculateNumBlocks(Size); - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) { - BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); - setupEndBlockPHINodes(); - createResultBlock(); - - // If return value of memcmp is not used in a zero equality, we need to - // calculate which source was larger. The calculation requires the - // two loaded source values of each load compare block. - // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) - setupResultBlockPHINodes(); - - // Create the number of required load compare basic blocks. - createLoadCmpBlocks(); - - // Update the terminator added by splitBasicBlock to branch to the first - // LoadCmpBlock. - StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); - } - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); -} - -void MemCmpExpansion::createLoadCmpBlocks() { - for (unsigned i = 0; i < NumBlocks; i++) { - BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", - EndBlock->getParent(), EndBlock); - LoadCmpBlocks.push_back(BB); - } -} - -void MemCmpExpansion::createResultBlock() { - ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", - EndBlock->getParent(), EndBlock); -} - -// This function creates the IR instructions for loading and comparing 1 byte. -// It loads 1 byte from each source of the memcmp parameters with the given -// GEPIndex. It then subtracts the two loaded values and adds this result to the -// final phi node for selecting the memcmp result. -void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, - unsigned GEPIndex) { - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - Builder.SetInsertPoint(LoadCmpBlocks[Index]); - Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } - - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); - - PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); - - if (Index < (LoadCmpBlocks.size() - 1)) { - // Early exit branch if difference found to EndBlock. Otherwise, continue to - // next LoadCmpBlock, - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, - ConstantInt::get(Diff->getType(), 0)); - BranchInst *CmpBr = - BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); - Builder.Insert(CmpBr); - } else { - // The last block has an unconditional branch to EndBlock. - BranchInst *CmpBr = BranchInst::Create(EndBlock); - Builder.Insert(CmpBr); - } -} - -unsigned MemCmpExpansion::getNumLoads(unsigned Size) { - return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize); -} - -unsigned MemCmpExpansion::getLoadSize(unsigned Size) { - return MinAlign(PowerOf2Floor(Size), MaxLoadSize); -} - -/// Generate an equality comparison for one or more pairs of loaded values. -/// This is used in the case where the memcmp() call is compared equal or not -/// equal to zero. -Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed) { - std::vector<Value *> XorList, OrList; - Value *Diff; - - unsigned RemainingBytes = Size - NumBytesProcessed; - unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); - unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); - - // For a single-block expansion, start inserting before the memcmp call. - if (LoadCmpBlocks.empty()) - Builder.SetInsertPoint(CI); - else - Builder.SetInsertPoint(LoadCmpBlocks[Index]); - - Value *Cmp = nullptr; - for (unsigned i = 0; i < NumLoads; ++i) { - unsigned LoadSize = getLoadSize(RemainingBytes); - unsigned GEPIndex = NumBytesProcessed / LoadSize; - NumBytesProcessed += LoadSize; - RemainingBytes -= LoadSize; - - Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - assert(LoadSize <= MaxLoadSize && "Unexpected load type"); - - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } - - // Get a constant or load a value for each source address. - Value *LoadSrc1 = nullptr; - if (auto *Source1C = dyn_cast<Constant>(Source1)) - LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); - if (!LoadSrc1) - LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - - Value *LoadSrc2 = nullptr; - if (auto *Source2C = dyn_cast<Constant>(Source2)) - LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); - if (!LoadSrc2) - LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (NumLoads != 1) { - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } - // If we have multiple loads per block, we need to generate a composite - // comparison using xor+or. - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExt(Diff, MaxLoadType); - XorList.push_back(Diff); - } else { - // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); - } - } - - auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { - std::vector<Value *> OutList; - for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { - Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); - OutList.push_back(Or); - } - if (InList.size() % 2 != 0) - OutList.push_back(InList.back()); - return OutList; - }; - - if (!Cmp) { - // Pairwise OR the XOR results. - OrList = pairWiseOr(XorList); - - // Pairwise OR the OR results until one result left. - while (OrList.size() != 1) { - OrList = pairWiseOr(OrList); - } - Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); - } - - return Cmp; -} - -void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( - unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { - Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed); - - BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) - ? EndBlock - : LoadCmpBlocks[Index + 1]; - // Early exit branch if difference found to ResultBlock. Otherwise, - // continue to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); - Builder.Insert(CmpBr); - - // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 - // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes). - if (Index == LoadCmpBlocks.size() - 1) { - Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); - } -} - -// This function creates the IR intructions for loading and comparing using the -// given LoadSize. It loads the number of bytes specified by LoadSize from each -// source of the memcmp parameters. It then does a subtract to see if there was -// a difference in the loaded values. If a difference is found, it branches -// with an early exit to the ResultBlock for calculating which source was -// larger. Otherwise, it falls through to the either the next LoadCmpBlock or -// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with -// a special case through emitLoadCompareByteBlock. The special handling can -// simply subtract the loaded values and add it to the result phi node. -void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, - unsigned GEPIndex) { - if (LoadSize == 1) { - MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); - return; - } - - Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - assert(LoadSize <= MaxLoadSize && "Unexpected load type"); - - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - Builder.SetInsertPoint(LoadCmpBlocks[Index]); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian()) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } - - // Add the loaded values to the phi nodes for calculating memcmp result only - // if result is not used in a zero equality. - if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); - } - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); - BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) - ? EndBlock - : LoadCmpBlocks[Index + 1]; - // Early exit branch if difference found to ResultBlock. Otherwise, continue - // to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); - Builder.Insert(CmpBr); - - // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 - // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes). - if (Index == LoadCmpBlocks.size() - 1) { - Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); - } -} - -// This function populates the ResultBlock with a sequence to calculate the -// memcmp result. It compares the two loaded source values and returns -1 if -// src1 < src2 and 1 if src1 > src2. -void MemCmpExpansion::emitMemCmpResultBlock() { - // Special case: if memcmp result is used in a zero equality, result does not - // need to be calculated and can simply return 1. - if (IsUsedForZeroCmp) { - BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); - Builder.SetInsertPoint(ResBlock.BB, InsertPt); - Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); - PhiRes->addIncoming(Res, ResBlock.BB); - BranchInst *NewBr = BranchInst::Create(EndBlock); - Builder.Insert(NewBr); - return; - } - BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); - Builder.SetInsertPoint(ResBlock.BB, InsertPt); - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, - ResBlock.PhiSrc2); - - Value *Res = - Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), - ConstantInt::get(Builder.getInt32Ty(), 1)); - - BranchInst *NewBr = BranchInst::Create(EndBlock); - Builder.Insert(NewBr); - PhiRes->addIncoming(Res, ResBlock.BB); -} - -unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) { - unsigned NumBlocks = 0; - bool HaveOneByteLoad = false; - unsigned RemainingSize = Size; - unsigned LoadSize = MaxLoadSize; - while (RemainingSize) { - if (LoadSize == 1) - HaveOneByteLoad = true; - NumBlocks += RemainingSize / LoadSize; - RemainingSize = RemainingSize % LoadSize; - LoadSize = LoadSize / 2; - } - NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks; - - if (IsUsedForZeroCmp) - NumBlocks = NumBlocks / NumLoadsPerBlock + - (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0); - - return NumBlocks; -} - -void MemCmpExpansion::setupResultBlockPHINodes() { - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - Builder.SetInsertPoint(ResBlock.BB); - ResBlock.PhiSrc1 = - Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1"); - ResBlock.PhiSrc2 = - Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2"); -} - -void MemCmpExpansion::setupEndBlockPHINodes() { - Builder.SetInsertPoint(&EndBlock->front()); - PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); -} - -Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { - unsigned NumBytesProcessed = 0; - // This loop populates each of the LoadCmpBlocks with the IR sequence to - // handle multiple loads per block. - for (unsigned i = 0; i < NumBlocks; ++i) - emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); - - emitMemCmpResultBlock(); - return PhiRes; -} - -/// A memcmp expansion that compares equality with 0 and only has one block of -/// load and compare can bypass the compare, branch, and phi IR that is required -/// in the general case. -Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { - unsigned NumBytesProcessed = 0; - Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed); - return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); -} - -/// A memcmp expansion that only has one block of load and compare can bypass -/// the compare, branch, and phi IR that is required in the general case. -Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { - assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); - - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian() && Size != 1) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - // TODO: Instead of comparing ULT, just subtract and return the difference? - Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); - Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); - Type *I32 = Builder.getInt32Ty(); - Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1), - ConstantInt::get(I32, 1)); - return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0)); -} - -// This function expands the memcmp call into an inline expansion and returns -// the memcmp result. -Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { - if (IsUsedForZeroCmp) - return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : - getMemCmpExpansionZeroCase(Size); - - // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (NumBlocks == 1 && NumLoadsPerBlock == 1) - return getMemCmpOneBlock(Size); - - // This loop calls emitLoadCompareBlock for comparing Size bytes of the two - // memcmp sources. It starts with loading using the maximum load size set by - // the target. It processes any remaining bytes using a load size which is the - // next smallest power of 2. - unsigned LoadSize = MaxLoadSize; - unsigned NumBytesToBeProcessed = Size; - unsigned Index = 0; - while (NumBytesToBeProcessed) { - // Calculate how many blocks we can create with the current load size. - unsigned NumBlocks = NumBytesToBeProcessed / LoadSize; - unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; - NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; - - // For each NumBlocks, populate the instruction sequence for loading and - // comparing LoadSize bytes. - while (NumBlocks--) { - emitLoadCompareBlock(Index, LoadSize, GEPIndex); - Index++; - GEPIndex++; - } - // Get the next LoadSize to use. - LoadSize = LoadSize / 2; - } - - emitMemCmpResultBlock(); - return PhiRes; -} - -// This function checks to see if an expansion of memcmp can be generated. -// It checks for constant compare size that is less than the max inline size. -// If an expansion cannot occur, returns false to leave as a library call. -// Otherwise, the library call is replaced with a new IR instruction sequence. -/// We want to transform: -/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) -/// To: -/// loadbb: -/// %0 = bitcast i32* %buffer2 to i8* -/// %1 = bitcast i32* %buffer1 to i8* -/// %2 = bitcast i8* %1 to i64* -/// %3 = bitcast i8* %0 to i64* -/// %4 = load i64, i64* %2 -/// %5 = load i64, i64* %3 -/// %6 = call i64 @llvm.bswap.i64(i64 %4) -/// %7 = call i64 @llvm.bswap.i64(i64 %5) -/// %8 = sub i64 %6, %7 -/// %9 = icmp ne i64 %8, 0 -/// br i1 %9, label %res_block, label %loadbb1 -/// res_block: ; preds = %loadbb2, -/// %loadbb1, %loadbb -/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] -/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] -/// %10 = icmp ult i64 %phi.src1, %phi.src2 -/// %11 = select i1 %10, i32 -1, i32 1 -/// br label %endblock -/// loadbb1: ; preds = %loadbb -/// %12 = bitcast i32* %buffer2 to i8* -/// %13 = bitcast i32* %buffer1 to i8* -/// %14 = bitcast i8* %13 to i32* -/// %15 = bitcast i8* %12 to i32* -/// %16 = getelementptr i32, i32* %14, i32 2 -/// %17 = getelementptr i32, i32* %15, i32 2 -/// %18 = load i32, i32* %16 -/// %19 = load i32, i32* %17 -/// %20 = call i32 @llvm.bswap.i32(i32 %18) -/// %21 = call i32 @llvm.bswap.i32(i32 %19) -/// %22 = zext i32 %20 to i64 -/// %23 = zext i32 %21 to i64 -/// %24 = sub i64 %22, %23 -/// %25 = icmp ne i64 %24, 0 -/// br i1 %25, label %res_block, label %loadbb2 -/// loadbb2: ; preds = %loadbb1 -/// %26 = bitcast i32* %buffer2 to i8* -/// %27 = bitcast i32* %buffer1 to i8* -/// %28 = bitcast i8* %27 to i16* -/// %29 = bitcast i8* %26 to i16* -/// %30 = getelementptr i16, i16* %28, i16 6 -/// %31 = getelementptr i16, i16* %29, i16 6 -/// %32 = load i16, i16* %30 -/// %33 = load i16, i16* %31 -/// %34 = call i16 @llvm.bswap.i16(i16 %32) -/// %35 = call i16 @llvm.bswap.i16(i16 %33) -/// %36 = zext i16 %34 to i64 -/// %37 = zext i16 %35 to i64 -/// %38 = sub i64 %36, %37 -/// %39 = icmp ne i64 %38, 0 -/// br i1 %39, label %res_block, label %loadbb3 -/// loadbb3: ; preds = %loadbb2 -/// %40 = bitcast i32* %buffer2 to i8* -/// %41 = bitcast i32* %buffer1 to i8* -/// %42 = getelementptr i8, i8* %41, i8 14 -/// %43 = getelementptr i8, i8* %40, i8 14 -/// %44 = load i8, i8* %42 -/// %45 = load i8, i8* %43 -/// %46 = zext i8 %44 to i32 -/// %47 = zext i8 %45 to i32 -/// %48 = sub i32 %46, %47 -/// br label %endblock -/// endblock: ; preds = %res_block, -/// %loadbb3 -/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] -/// ret i32 %phi.res -static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, - const TargetLowering *TLI, const DataLayout *DL) { - NumMemCmpCalls++; - - // TTI call to check if target would like to expand memcmp. Also, get the - // MaxLoadSize. - unsigned MaxLoadSize; - if (!TTI->expandMemCmp(CI, MaxLoadSize)) - return false; - - // Early exit from expansion if -Oz. - if (CI->getFunction()->optForMinSize()) - return false; - - // Early exit from expansion if size is not a constant. - ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeCast) { - NumMemCmpNotConstant++; - return false; - } - - // Early exit from expansion if size greater than max bytes to load. - uint64_t SizeVal = SizeCast->getZExtValue(); - unsigned NumLoads = 0; - unsigned RemainingSize = SizeVal; - unsigned LoadSize = MaxLoadSize; - while (RemainingSize) { - NumLoads += RemainingSize / LoadSize; - RemainingSize = RemainingSize % LoadSize; - LoadSize = LoadSize / 2; - } - - if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { - NumMemCmpGreaterThanMax++; - return false; - } - - NumMemCmpInlined++; - - // MemCmpHelper object creates and sets up basic blocks required for - // expanding memcmp with size SizeVal. - unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; - MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL); - - Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal); - - // Replace call with result of expansion and erase call. - CI->replaceAllUsesWith(Res); - CI->eraseFromParent(); - - return true; -} - bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); @@ -2443,12 +1719,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { return true; } - LibFunc Func; - if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && - Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { - ModifiedDT = true; - return true; - } return false; } @@ -2599,19 +1869,125 @@ namespace { /// This is an extended version of TargetLowering::AddrMode /// which holds actual Value*'s for register values. struct ExtAddrMode : public TargetLowering::AddrMode { - Value *BaseReg; - Value *ScaledReg; - ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} + Value *BaseReg = nullptr; + Value *ScaledReg = nullptr; + Value *OriginalValue = nullptr; + + enum FieldName { + NoField = 0x00, + BaseRegField = 0x01, + BaseGVField = 0x02, + BaseOffsField = 0x04, + ScaledRegField = 0x08, + ScaleField = 0x10, + MultipleFields = 0xff + }; + + ExtAddrMode() = default; + void print(raw_ostream &OS) const; void dump() const; - bool operator==(const ExtAddrMode& O) const { - return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && - (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && - (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); + FieldName compare(const ExtAddrMode &other) { + // First check that the types are the same on each field, as differing types + // is something we can't cope with later on. + if (BaseReg && other.BaseReg && + BaseReg->getType() != other.BaseReg->getType()) + return MultipleFields; + if (BaseGV && other.BaseGV && + BaseGV->getType() != other.BaseGV->getType()) + return MultipleFields; + if (ScaledReg && other.ScaledReg && + ScaledReg->getType() != other.ScaledReg->getType()) + return MultipleFields; + + // Check each field to see if it differs. + unsigned Result = NoField; + if (BaseReg != other.BaseReg) + Result |= BaseRegField; + if (BaseGV != other.BaseGV) + Result |= BaseGVField; + if (BaseOffs != other.BaseOffs) + Result |= BaseOffsField; + if (ScaledReg != other.ScaledReg) + Result |= ScaledRegField; + // Don't count 0 as being a different scale, because that actually means + // unscaled (which will already be counted by having no ScaledReg). + if (Scale && other.Scale && Scale != other.Scale) + Result |= ScaleField; + + if (countPopulation(Result) > 1) + return MultipleFields; + else + return static_cast<FieldName>(Result); + } + + // An AddrMode is trivial if it involves no calculation i.e. it is just a base + // with no offset. + bool isTrivial() { + // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is + // trivial if at most one of these terms is nonzero, except that BaseGV and + // BaseReg both being zero actually means a null pointer value, which we + // consider to be 'non-zero' here. + return !BaseOffs && !Scale && !(BaseGV && BaseReg); + } + + Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) { + switch (Field) { + default: + return nullptr; + case BaseRegField: + return BaseReg; + case BaseGVField: + return BaseGV; + case ScaledRegField: + return ScaledReg; + case BaseOffsField: + return ConstantInt::get(IntPtrTy, BaseOffs); + } + } + + void SetCombinedField(FieldName Field, Value *V, + const SmallVectorImpl<ExtAddrMode> &AddrModes) { + switch (Field) { + default: + llvm_unreachable("Unhandled fields are expected to be rejected earlier"); + break; + case ExtAddrMode::BaseRegField: + BaseReg = V; + break; + case ExtAddrMode::BaseGVField: + // A combined BaseGV is an Instruction, not a GlobalValue, so it goes + // in the BaseReg field. + assert(BaseReg == nullptr); + BaseReg = V; + BaseGV = nullptr; + break; + case ExtAddrMode::ScaledRegField: + ScaledReg = V; + // If we have a mix of scaled and unscaled addrmodes then we want scale + // to be the scale and not zero. + if (!Scale) + for (const ExtAddrMode &AM : AddrModes) + if (AM.Scale) { + Scale = AM.Scale; + break; + } + break; + case ExtAddrMode::BaseOffsField: + // The offset is no longer a constant, so it goes in ScaledReg with a + // scale of 1. + assert(ScaledReg == nullptr); + ScaledReg = V; + Scale = 1; + BaseOffs = 0; + break; + } } }; +} // end anonymous namespace + #ifndef NDEBUG static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { AM.print(OS); @@ -2619,6 +1995,7 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { } #endif +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ExtAddrMode::print(raw_ostream &OS) const { bool NeedPlus = false; OS << "["; @@ -2650,18 +2027,18 @@ void ExtAddrMode::print(raw_ostream &OS) const { OS << ']'; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } #endif +namespace { + /// \brief This class provides transaction based operation on the IR. /// Every change made through this class is recorded in the internal state and /// can be undone (rollback) until commit is called. class TypePromotionTransaction { - /// \brief This represents the common interface of the individual transaction. /// Each class implements the logic for doing one specific modification on /// the IR via the TypePromotionTransaction. @@ -2675,7 +2052,7 @@ class TypePromotionTransaction { /// The constructor performs the related action on the IR. TypePromotionAction(Instruction *Inst) : Inst(Inst) {} - virtual ~TypePromotionAction() {} + virtual ~TypePromotionAction() = default; /// \brief Undo the modification done by this action. /// When this method is called, the IR must be in the same state as it was @@ -2702,6 +2079,7 @@ class TypePromotionTransaction { Instruction *PrevInst; BasicBlock *BB; } Point; + /// Remember whether or not the instruction had a previous instruction. bool HasPrevInstruction; @@ -2756,6 +2134,7 @@ class TypePromotionTransaction { class OperandSetter : public TypePromotionAction { /// Original operand of the instruction. Value *Origin; + /// Index of the modified instruction. unsigned Idx; @@ -2813,6 +2192,7 @@ class TypePromotionTransaction { /// \brief Build a truncate instruction. class TruncBuilder : public TypePromotionAction { Value *Val; + public: /// \brief Build a truncate instruction of \p Opnd producing a \p Ty /// result. @@ -2837,6 +2217,7 @@ class TypePromotionTransaction { /// \brief Build a sign extension instruction. class SExtBuilder : public TypePromotionAction { Value *Val; + public: /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty /// result. @@ -2862,6 +2243,7 @@ class TypePromotionTransaction { /// \brief Build a zero extension instruction. class ZExtBuilder : public TypePromotionAction { Value *Val; + public: /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty /// result. @@ -2912,15 +2294,18 @@ class TypePromotionTransaction { struct InstructionAndIdx { /// The instruction using the instruction. Instruction *Inst; + /// The index where this instruction is used for Inst. unsigned Idx; + InstructionAndIdx(Instruction *Inst, unsigned Idx) : Inst(Inst), Idx(Idx) {} }; /// Keep track of the original uses (pair Instruction, Index). SmallVector<InstructionAndIdx, 4> OriginalUses; - typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; + + using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator; public: /// \brief Replace all the use of \p Inst by \p New. @@ -2951,11 +2336,14 @@ class TypePromotionTransaction { class InstructionRemover : public TypePromotionAction { /// Original position of the instruction. InsertionHandler Inserter; + /// Helper structure to hide all the link to the instruction. In other /// words, this helps to do as if the instruction was removed. OperandsHider Hider; + /// Keep track of the uses replaced, if any. - UsesReplacer *Replacer; + UsesReplacer *Replacer = nullptr; + /// Keep track of instructions removed. SetOfInstrs &RemovedInsts; @@ -2967,7 +2355,7 @@ class TypePromotionTransaction { InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts, Value *New = nullptr) : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), - Replacer(nullptr), RemovedInsts(RemovedInsts) { + RemovedInsts(RemovedInsts) { if (New) Replacer = new UsesReplacer(Inst, New); DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); @@ -2996,15 +2384,17 @@ public: /// Restoration point. /// The restoration point is a pointer to an action instead of an iterator /// because the iterator may be invalidated but not the pointer. - typedef const TypePromotionAction *ConstRestorationPt; + using ConstRestorationPt = const TypePromotionAction *; TypePromotionTransaction(SetOfInstrs &RemovedInsts) : RemovedInsts(RemovedInsts) {} /// Advocate every changes made in that transaction. void commit(); + /// Undo all the changes made after the given point. void rollback(ConstRestorationPt Point); + /// Get the current restoration point. ConstRestorationPt getRestorationPoint() const; @@ -3012,18 +2402,25 @@ public: /// @{ /// Same as Instruction::setOperand. void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); + /// Same as Instruction::eraseFromParent. void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); + /// Same as Value::replaceAllUsesWith. void replaceAllUsesWith(Instruction *Inst, Value *New); + /// Same as Value::mutateType. void mutateType(Instruction *Inst, Type *NewTy); + /// Same as IRBuilder::createTrunc. Value *createTrunc(Instruction *Opnd, Type *Ty); + /// Same as IRBuilder::createSExt. Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as IRBuilder::createZExt. Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as Instruction::moveBefore. void moveBefore(Instruction *Inst, Instruction *Before); /// @} @@ -3031,30 +2428,36 @@ public: private: /// The ordered list of actions made so far. SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; - typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; + + using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator; + SetOfInstrs &RemovedInsts; }; +} // end anonymous namespace + void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { - Actions.push_back( - make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); + Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>( + Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - make_unique<TypePromotionTransaction::InstructionRemover>(Inst, - RemovedInsts, NewVal)); + llvm::make_unique<TypePromotionTransaction::InstructionRemover>( + Inst, RemovedInsts, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { - Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); + Actions.push_back( + llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { - Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); + Actions.push_back( + llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); } Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, @@ -3084,7 +2487,8 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst, void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); + llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>( + Inst, Before)); } TypePromotionTransaction::ConstRestorationPt @@ -3107,6 +2511,8 @@ void TypePromotionTransaction::rollback( } } +namespace { + /// \brief A helper class for matching addressing modes. /// /// This encapsulates the logic for matching the target-legal addressing modes. @@ -3128,8 +2534,10 @@ class AddressingModeMatcher { /// The instructions inserted by other CodeGenPrepare optimizations. const SetOfInstrs &InsertedInsts; + /// A map from the instructions to their type before promotion. InstrToOrigTy &PromotedInsts; + /// The ongoing transaction where every action should be registered. TypePromotionTransaction &TPT; @@ -3151,8 +2559,8 @@ class AddressingModeMatcher { PromotedInsts(PromotedInsts), TPT(TPT) { IgnoreProfitability = false; } -public: +public: /// Find the maximal addressing mode that a load/store of V can fold, /// give an access type of AccessTy. This returns a list of involved /// instructions in AddrModeInsts. @@ -3177,6 +2585,7 @@ public: (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } + private: bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); bool matchAddr(Value *V, unsigned Depth); @@ -3190,6 +2599,520 @@ private: Value *PromotedOperand) const; }; +/// \brief Keep track of simplification of Phi nodes. +/// Accept the set of all phi nodes and erase phi node from this set +/// if it is simplified. +class SimplificationTracker { + DenseMap<Value *, Value *> Storage; + const SimplifyQuery &SQ; + SmallPtrSetImpl<PHINode *> &AllPhiNodes; + SmallPtrSetImpl<SelectInst *> &AllSelectNodes; + +public: + SimplificationTracker(const SimplifyQuery &sq, + SmallPtrSetImpl<PHINode *> &APN, + SmallPtrSetImpl<SelectInst *> &ASN) + : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {} + + Value *Get(Value *V) { + do { + auto SV = Storage.find(V); + if (SV == Storage.end()) + return V; + V = SV->second; + } while (true); + } + + Value *Simplify(Value *Val) { + SmallVector<Value *, 32> WorkList; + SmallPtrSet<Value *, 32> Visited; + WorkList.push_back(Val); + while (!WorkList.empty()) { + auto P = WorkList.pop_back_val(); + if (!Visited.insert(P).second) + continue; + if (auto *PI = dyn_cast<Instruction>(P)) + if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) { + for (auto *U : PI->users()) + WorkList.push_back(cast<Value>(U)); + Put(PI, V); + PI->replaceAllUsesWith(V); + if (auto *PHI = dyn_cast<PHINode>(PI)) + AllPhiNodes.erase(PHI); + if (auto *Select = dyn_cast<SelectInst>(PI)) + AllSelectNodes.erase(Select); + PI->eraseFromParent(); + } + } + return Get(Val); + } + + void Put(Value *From, Value *To) { + Storage.insert({ From, To }); + } +}; + +/// \brief A helper class for combining addressing modes. +class AddressingModeCombiner { + typedef std::pair<Value *, BasicBlock *> ValueInBB; + typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping; + typedef std::pair<PHINode *, PHINode *> PHIPair; + +private: + /// The addressing modes we've collected. + SmallVector<ExtAddrMode, 16> AddrModes; + + /// The field in which the AddrModes differ, when we have more than one. + ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField; + + /// Are the AddrModes that we have all just equal to their original values? + bool AllAddrModesTrivial = true; + + /// Common Type for all different fields in addressing modes. + Type *CommonType; + + /// SimplifyQuery for simplifyInstruction utility. + const SimplifyQuery &SQ; + + /// Original Address. + ValueInBB Original; + +public: + AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue) + : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} + + /// \brief Get the combined AddrMode + const ExtAddrMode &getAddrMode() const { + return AddrModes[0]; + } + + /// \brief Add a new AddrMode if it's compatible with the AddrModes we already + /// have. + /// \return True iff we succeeded in doing so. + bool addNewAddrMode(ExtAddrMode &NewAddrMode) { + // Take note of if we have any non-trivial AddrModes, as we need to detect + // when all AddrModes are trivial as then we would introduce a phi or select + // which just duplicates what's already there. + AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial(); + + // If this is the first addrmode then everything is fine. + if (AddrModes.empty()) { + AddrModes.emplace_back(NewAddrMode); + return true; + } + + // Figure out how different this is from the other address modes, which we + // can do just by comparing against the first one given that we only care + // about the cumulative difference. + ExtAddrMode::FieldName ThisDifferentField = + AddrModes[0].compare(NewAddrMode); + if (DifferentField == ExtAddrMode::NoField) + DifferentField = ThisDifferentField; + else if (DifferentField != ThisDifferentField) + DifferentField = ExtAddrMode::MultipleFields; + + // If NewAddrMode differs in only one dimension, and that dimension isn't + // the amount that ScaledReg is scaled by, then we can handle it by + // inserting a phi/select later on. Even if NewAddMode is the same + // we still need to collect it due to original value is different. + // And later we will need all original values as anchors during + // finding the common Phi node. + if (DifferentField != ExtAddrMode::MultipleFields && + DifferentField != ExtAddrMode::ScaleField) { + AddrModes.emplace_back(NewAddrMode); + return true; + } + + // We couldn't combine NewAddrMode with the rest, so return failure. + AddrModes.clear(); + return false; + } + + /// \brief Combine the addressing modes we've collected into a single + /// addressing mode. + /// \return True iff we successfully combined them or we only had one so + /// didn't need to combine them anyway. + bool combineAddrModes() { + // If we have no AddrModes then they can't be combined. + if (AddrModes.size() == 0) + return false; + + // A single AddrMode can trivially be combined. + if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField) + return true; + + // If the AddrModes we collected are all just equal to the value they are + // derived from then combining them wouldn't do anything useful. + if (AllAddrModesTrivial) + return false; + + if (!addrModeCombiningAllowed()) + return false; + + // Build a map between <original value, basic block where we saw it> to + // value of base register. + // Bail out if there is no common type. + FoldAddrToValueMapping Map; + if (!initializeMap(Map)) + return false; + + Value *CommonValue = findCommon(Map); + if (CommonValue) + AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); + return CommonValue != nullptr; + } + +private: + /// \brief Initialize Map with anchor values. For address seen in some BB + /// we set the value of different field saw in this address. + /// If address is not an instruction than basic block is set to null. + /// At the same time we find a common type for different field we will + /// use to create new Phi/Select nodes. Keep it in CommonType field. + /// Return false if there is no common type found. + bool initializeMap(FoldAddrToValueMapping &Map) { + // Keep track of keys where the value is null. We will need to replace it + // with constant null when we know the common type. + SmallVector<ValueInBB, 2> NullValue; + Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); + for (auto &AM : AddrModes) { + BasicBlock *BB = nullptr; + if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue)) + BB = I->getParent(); + + Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); + if (DV) { + auto *Type = DV->getType(); + if (CommonType && CommonType != Type) + return false; + CommonType = Type; + Map[{ AM.OriginalValue, BB }] = DV; + } else { + NullValue.push_back({ AM.OriginalValue, BB }); + } + } + assert(CommonType && "At least one non-null value must be!"); + for (auto VIBB : NullValue) + Map[VIBB] = Constant::getNullValue(CommonType); + return true; + } + + /// \brief We have mapping between value A and basic block where value A + /// seen to other value B where B was a field in addressing mode represented + /// by A. Also we have an original value C representin an address in some + /// basic block. Traversing from C through phi and selects we ended up with + /// A's in a map. This utility function tries to find a value V which is a + /// field in addressing mode C and traversing through phi nodes and selects + /// we will end up in corresponded values B in a map. + /// The utility will create a new Phi/Selects if needed. + // The simple example looks as follows: + // BB1: + // p1 = b1 + 40 + // br cond BB2, BB3 + // BB2: + // p2 = b2 + 40 + // br BB3 + // BB3: + // p = phi [p1, BB1], [p2, BB2] + // v = load p + // Map is + // <p1, BB1> -> b1 + // <p2, BB2> -> b2 + // Request is + // <p, BB3> -> ? + // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 + Value *findCommon(FoldAddrToValueMapping &Map) { + // Tracks of new created Phi nodes. + SmallPtrSet<PHINode *, 32> NewPhiNodes; + // Tracks of new created Select nodes. + SmallPtrSet<SelectInst *, 32> NewSelectNodes; + // Tracks the simplification of new created phi nodes. The reason we use + // this mapping is because we will add new created Phi nodes in AddrToBase. + // Simplification of Phi nodes is recursive, so some Phi node may + // be simplified after we added it to AddrToBase. + // Using this mapping we can find the current value in AddrToBase. + SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes); + + // First step, DFS to create PHI nodes for all intermediate blocks. + // Also fill traverse order for the second step. + SmallVector<ValueInBB, 32> TraverseOrder; + InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes); + + // Second Step, fill new nodes by merged values and simplify if possible. + FillPlaceholders(Map, TraverseOrder, ST); + + if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + // Now we'd like to match New Phi nodes to existed ones. + unsigned PhiNotMatchedCount = 0; + if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + auto *Result = ST.Get(Map.find(Original)->second); + if (Result) { + NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount; + NumMemoryInstsSelectCreated += NewSelectNodes.size(); + } + return Result; + } + + /// \brief Destroy nodes from a set. + template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) { + // For safe erasing, replace the Phi with dummy value first. + auto Dummy = UndefValue::get(CommonType); + for (auto I : Instructions) { + I->replaceAllUsesWith(Dummy); + I->eraseFromParent(); + } + } + + /// \brief Try to match PHI node to Candidate. + /// Matcher tracks the matched Phi nodes. + bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, + DenseSet<PHIPair> &Matcher, + SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) { + SmallVector<PHIPair, 8> WorkList; + Matcher.insert({ PHI, Candidate }); + WorkList.push_back({ PHI, Candidate }); + SmallSet<PHIPair, 8> Visited; + while (!WorkList.empty()) { + auto Item = WorkList.pop_back_val(); + if (!Visited.insert(Item).second) + continue; + // We iterate over all incoming values to Phi to compare them. + // If values are different and both of them Phi and the first one is a + // Phi we added (subject to match) and both of them is in the same basic + // block then we can match our pair if values match. So we state that + // these values match and add it to work list to verify that. + for (auto B : Item.first->blocks()) { + Value *FirstValue = Item.first->getIncomingValueForBlock(B); + Value *SecondValue = Item.second->getIncomingValueForBlock(B); + if (FirstValue == SecondValue) + continue; + + PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue); + PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue); + + // One of them is not Phi or + // The first one is not Phi node from the set we'd like to match or + // Phi nodes from different basic blocks then + // we will not be able to match. + if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || + FirstPhi->getParent() != SecondPhi->getParent()) + return false; + + // If we already matched them then continue. + if (Matcher.count({ FirstPhi, SecondPhi })) + continue; + // So the values are different and does not match. So we need them to + // match. + Matcher.insert({ FirstPhi, SecondPhi }); + // But me must check it. + WorkList.push_back({ FirstPhi, SecondPhi }); + } + } + return true; + } + + /// \brief For the given set of PHI nodes try to find their equivalents. + /// Returns false if this matching fails and creation of new Phi is disabled. + bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch, + SimplificationTracker &ST, bool AllowNewPhiNodes, + unsigned &PhiNotMatchedCount) { + DenseSet<PHIPair> Matched; + SmallPtrSet<PHINode *, 8> WillNotMatch; + while (PhiNodesToMatch.size()) { + PHINode *PHI = *PhiNodesToMatch.begin(); + + // Add us, if no Phi nodes in the basic block we do not match. + WillNotMatch.clear(); + WillNotMatch.insert(PHI); + + // Traverse all Phis until we found equivalent or fail to do that. + bool IsMatched = false; + for (auto &P : PHI->getParent()->phis()) { + if (&P == PHI) + continue; + if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) + break; + // If it does not match, collect all Phi nodes from matcher. + // if we end up with no match, them all these Phi nodes will not match + // later. + for (auto M : Matched) + WillNotMatch.insert(M.first); + Matched.clear(); + } + if (IsMatched) { + // Replace all matched values and erase them. + for (auto MV : Matched) { + MV.first->replaceAllUsesWith(MV.second); + PhiNodesToMatch.erase(MV.first); + ST.Put(MV.first, MV.second); + MV.first->eraseFromParent(); + } + Matched.clear(); + continue; + } + // If we are not allowed to create new nodes then bail out. + if (!AllowNewPhiNodes) + return false; + // Just remove all seen values in matcher. They will not match anything. + PhiNotMatchedCount += WillNotMatch.size(); + for (auto *P : WillNotMatch) + PhiNodesToMatch.erase(P); + } + return true; + } + /// \brief Fill the placeholder with values from predecessors and simplify it. + void FillPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl<ValueInBB> &TraverseOrder, + SimplificationTracker &ST) { + while (!TraverseOrder.empty()) { + auto Current = TraverseOrder.pop_back_val(); + assert(Map.find(Current) != Map.end() && "No node to fill!!!"); + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + Value *V = Map[Current]; + + if (SelectInst *Select = dyn_cast<SelectInst>(V)) { + // CurrentValue also must be Select. + auto *CurrentSelect = cast<SelectInst>(CurrentValue); + auto *TrueValue = CurrentSelect->getTrueValue(); + ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(TrueItem) != Map.end() && "No True Value!"); + Select->setTrueValue(ST.Get(Map[TrueItem])); + auto *FalseValue = CurrentSelect->getFalseValue(); + ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(FalseItem) != Map.end() && "No False Value!"); + Select->setFalseValue(ST.Get(Map[FalseItem])); + } else { + // Must be a Phi node then. + PHINode *PHI = cast<PHINode>(V); + // Fill the Phi node with values from predecessors. + bool IsDefinedInThisBB = + cast<Instruction>(CurrentValue)->getParent() == CurrentBlock; + auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue); + for (auto B : predecessors(CurrentBlock)) { + Value *PV = IsDefinedInThisBB + ? CurrentPhi->getIncomingValueForBlock(B) + : CurrentValue; + ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr }; + assert(Map.find(item) != Map.end() && "No predecessor Value!"); + PHI->addIncoming(ST.Get(Map[item]), B); + } + } + // Simplify if possible. + Map[Current] = ST.Simplify(V); + } + } + + /// Starting from value recursively iterates over predecessors up to known + /// ending values represented in a map. For each traversed block inserts + /// a placeholder Phi or Select. + /// Reports all new created Phi/Select nodes by adding them to set. + /// Also reports and order in what basic blocks have been traversed. + void InsertPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl<ValueInBB> &TraverseOrder, + SmallPtrSetImpl<PHINode *> &NewPhiNodes, + SmallPtrSetImpl<SelectInst *> &NewSelectNodes) { + SmallVector<ValueInBB, 32> Worklist; + assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) && + "Address must be a Phi or Select node"); + auto *Dummy = UndefValue::get(CommonType); + Worklist.push_back(Original); + while (!Worklist.empty()) { + auto Current = Worklist.pop_back_val(); + // If value is not an instruction it is something global, constant, + // parameter and we can say that this value is observable in any block. + // Set block to null to denote it. + // Also please take into account that it is how we build anchors. + if (!isa<Instruction>(Current.first)) + Current.second = nullptr; + // if it is already visited or it is an ending value then skip it. + if (Map.find(Current) != Map.end()) + continue; + TraverseOrder.push_back(Current); + + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + // CurrentValue must be a Phi node or select. All others must be covered + // by anchors. + Instruction *CurrentI = cast<Instruction>(CurrentValue); + bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; + + unsigned PredCount = + std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock)); + // if Current Value is not defined in this basic block we are interested + // in values in predecessors. + if (!IsDefinedInThisBB) { + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentValue, B }); + continue; + } + // Value is defined in this basic block. + if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) { + // Is it OK to get metadata from OrigSelect?! + // Create a Select placeholder with dummy value. + SelectInst *Select = + SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy, + OrigSelect->getName(), OrigSelect, OrigSelect); + Map[Current] = Select; + NewSelectNodes.insert(Select); + // We are interested in True and False value in this basic block. + Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock }); + Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); + } else { + // It must be a Phi node then. + auto *CurrentPhi = cast<PHINode>(CurrentI); + // Create new Phi node for merge of bases. + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B }); + } + } + } + + bool addrModeCombiningAllowed() { + if (DisableComplexAddrModes) + return false; + switch (DifferentField) { + default: + return false; + case ExtAddrMode::BaseRegField: + return AddrSinkCombineBaseReg; + case ExtAddrMode::BaseGVField: + return AddrSinkCombineBaseGV; + case ExtAddrMode::BaseOffsField: + return AddrSinkCombineBaseOffs; + case ExtAddrMode::ScaledRegField: + return AddrSinkCombineScaledReg; + } + } +}; +} // end anonymous namespace + /// Try adding ScaleReg*Scale to the current addressing mode. /// Return true and update AddrMode if this addr mode is legal for the target, /// false if not. @@ -3294,6 +3217,8 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI, ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); } +namespace { + /// \brief Hepler class to perform type promotion. class TypePromotionHelper { /// \brief Utility function to check whether or not a sign or zero extension @@ -3370,12 +3295,13 @@ class TypePromotionHelper { public: /// Type for the utility function that promotes the operand of Ext. - typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInstsCost, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, - const TargetLowering &TLI); + using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI); + /// \brief Given a sign/zero extend instruction \p Ext, return the approriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current @@ -3390,6 +3316,8 @@ public: const InstrToOrigTy &PromotedInsts); }; +} // end anonymous namespace + bool TypePromotionHelper::canGetThrough(const Instruction *Inst, Type *ConsideredExtType, const InstrToOrigTy &PromotedInsts, @@ -3488,7 +3416,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( } Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( - llvm::Instruction *SExt, TypePromotionTransaction &TPT, + Instruction *SExt, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { @@ -3552,9 +3480,8 @@ Value *TypePromotionHelper::promoteOperandForOther( // Create the truncate now. Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { - ITrunc->removeFromParent(); // Insert it just after the definition. - ITrunc->insertAfter(ExtOpnd); + ITrunc->moveAfter(ExtOpnd); if (Truncs) Truncs->push_back(ITrunc); } @@ -3752,7 +3679,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); - if (!RHS) + if (!RHS || RHS->getBitWidth() > 64) return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) @@ -4234,8 +4161,6 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, return true; } -} // end anonymous namespace - /// Return true if the specified values are defined in a /// different basic block than BB. static bool IsNonLocalValue(Value *V, BasicBlock *BB) { @@ -4273,13 +4198,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, SmallPtrSet<Value*, 16> Visited; worklist.push_back(Addr); - // Use a worklist to iteratively look through PHI nodes, and ensure that - // the addressing mode obtained from the non-PHI roots of the graph - // are equivalent. - bool AddrModeFound = false; - bool PhiSeen = false; + // Use a worklist to iteratively look through PHI and select nodes, and + // ensure that the addressing mode obtained from the non-PHI/select roots of + // the graph are compatible. + bool PhiOrSelectSeen = false; SmallVector<Instruction*, 16> AddrModeInsts; - ExtAddrMode AddrMode; + const SimplifyQuery SQ(*DL, TLInfo); + AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() }); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); @@ -4303,7 +4228,14 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (PHINode *P = dyn_cast<PHINode>(V)) { for (Value *IncValue : P->incoming_values()) worklist.push_back(IncValue); - PhiSeen = true; + PhiOrSelectSeen = true; + continue; + } + // Similar for select. + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + worklist.push_back(SI->getFalseValue()); + worklist.push_back(SI->getTrueValue()); + PhiOrSelectSeen = true; continue; } @@ -4314,30 +4246,29 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, InsertedInsts, PromotedInsts, TPT); + NewAddrMode.OriginalValue = V; - if (!AddrModeFound) { - AddrModeFound = true; - AddrMode = NewAddrMode; - continue; - } - if (NewAddrMode == AddrMode) - continue; - - AddrModeFound = false; - break; + if (!AddrModes.addNewAddrMode(NewAddrMode)) + break; } - // If the addressing mode couldn't be determined, or if multiple different - // ones were determined, bail out now. - if (!AddrModeFound) { + // Try to combine the AddrModes we've collected. If we couldn't collect any, + // or we have multiple but either couldn't combine them or combining them + // wouldn't do anything useful, bail out now. + if (!AddrModes.combineAddrModes()) { TPT.rollback(LastKnownGood); return false; } TPT.commit(); + // Get the combined AddrMode (or the only AddrMode, if we only had one). + ExtAddrMode AddrMode = AddrModes.getAddrMode(); + // If all the instructions matched are already in this BB, don't do anything. - // If we saw Phi node then it is not local definitely. - if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) { + // If we saw a Phi node then it is not local definitely, and if we saw a select + // then we want to push the address calculation past it even if it's already + // in this BB. + if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); @@ -4351,9 +4282,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already - // done this for some other load/store instr in this block. If so, reuse the - // computation. - Value *&SunkAddr = SunkAddrs[Addr]; + // done this for some other load/store instr in this block. If so, reuse + // the computation. Before attempting reuse, check if the address is valid + // as it may have been erased. + + WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; + + Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); @@ -4578,6 +4513,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + // Store the newly computed address into the cache. In the case we reused a + // value, this should be idempotent. + SunkAddrs[Addr] = WeakTrackingVH(SunkAddr); // If we have no uses, recursively delete the value and all dead instructions // using it. @@ -4909,8 +4847,7 @@ bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { assert(LI && ExtFedByLoad && "Expect a valid load and extension"); TPT.commit(); // Move the extend into the same block as the load - ExtFedByLoad->removeFromParent(); - ExtFedByLoad->insertAfter(LI); + ExtFedByLoad->moveAfter(LI); // CGP does not check if the zext would be speculatively executed when moved // to the same basic block as the load. Preserving its original location // would pessimize the debugging experience, as well as negatively impact @@ -5127,10 +5064,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { // b2: // x = phi x1', x2' // y = and x, 0xff -// - bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { - if (!Load->isSimple() || !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) return false; @@ -5169,7 +5103,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { } switch (I->getOpcode()) { - case llvm::Instruction::And: { + case Instruction::And: { auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1)); if (!AndC) return false; @@ -5183,7 +5117,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { break; } - case llvm::Instruction::Shl: { + case Instruction::Shl: { auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1)); if (!ShlC) return false; @@ -5192,7 +5126,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { break; } - case llvm::Instruction::Trunc: { + case Instruction::Trunc: { EVT TruncVT = TLI->getValueType(*DL, I->getType()); unsigned TruncBitWidth = TruncVT.getSizeInBits(); DemandBits.setLowBits(TruncBitWidth); @@ -5596,6 +5530,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { namespace { + /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. /// E.g., @@ -5623,12 +5558,15 @@ class VectorPromoteHelper { /// The transition being moved downwards. Instruction *Transition; + /// The sequence of instructions to be promoted. SmallVector<Instruction *, 4> InstsToBePromoted; + /// Cost of combining a store and an extract. unsigned StoreExtractCombineCost; + /// Instruction that will be combined with the transition. - Instruction *CombineInst; + Instruction *CombineInst = nullptr; /// \brief The instruction that represents the current end of the transition. /// Since we are faking the promotion until we reach the end of the chain @@ -5734,7 +5672,7 @@ class VectorPromoteHelper { /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only /// used at the index of the extract. Value *getConstantVector(Constant *Val, bool UseSplat) const { - unsigned ExtractIdx = UINT_MAX; + unsigned ExtractIdx = std::numeric_limits<unsigned>::max(); if (!UseSplat) { // If we cannot determine where the constant must be, we have to // use a splat constant. @@ -5788,7 +5726,7 @@ public: const TargetTransformInfo &TTI, Instruction *Transition, unsigned CombineCost) : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), - StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { + StoreExtractCombineCost(CombineCost) { assert(Transition && "Do not know how to promote null"); } @@ -5863,7 +5801,8 @@ public: return true; } }; -} // End of anonymous namespace. + +} // end anonymous namespace void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { // At this point, we know that all the operands of ToBePromoted but Def @@ -5902,8 +5841,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { "this?"); ToBePromoted->setOperand(U.getOperandNo(), NewVal); } - Transition->removeFromParent(); - Transition->insertAfter(ToBePromoted); + Transition->moveAfter(ToBePromoted); Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); } @@ -5911,7 +5849,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { - unsigned CombineCost = UINT_MAX; + unsigned CombineCost = std::numeric_limits<unsigned>::max(); if (DisableStoreExtract || !TLI || (!StressStoreExtract && !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), @@ -6073,6 +6011,170 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, return true; } +// Return true if the GEP has two operands, the first operand is of a sequential +// type, and the second operand is a constant. +static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { + gep_type_iterator I = gep_type_begin(*GEP); + return GEP->getNumOperands() == 2 && + I.isSequential() && + isa<ConstantInt>(GEP->getOperand(1)); +} + +// Try unmerging GEPs to reduce liveness interference (register pressure) across +// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, +// reducing liveness interference across those edges benefits global register +// allocation. Currently handles only certain cases. +// +// For example, unmerge %GEPI and %UGEPI as below. +// +// ---------- BEFORE ---------- +// SrcBlock: +// ... +// %GEPIOp = ... +// ... +// %GEPI = gep %GEPIOp, Idx +// ... +// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] +// (* %GEPI is alive on the indirectbr edges due to other uses ahead) +// (* %GEPIOp is alive on the indirectbr edges only because of it's used by +// %UGEPI) +// +// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) +// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) +// ... +// +// DstBi: +// ... +// %UGEPI = gep %GEPIOp, UIdx +// ... +// --------------------------- +// +// ---------- AFTER ---------- +// SrcBlock: +// ... (same as above) +// (* %GEPI is still alive on the indirectbr edges) +// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the +// unmerging) +// ... +// +// DstBi: +// ... +// %UGEPI = gep %GEPI, (UIdx-Idx) +// ... +// --------------------------- +// +// The register pressure on the IndirectBr edges is reduced because %GEPIOp is +// no longer alive on them. +// +// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging +// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as +// not to disable further simplications and optimizations as a result of GEP +// merging. +// +// Note this unmerging may increase the length of the data flow critical path +// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff +// between the register pressure and the length of data-flow critical +// path. Restricting this to the uncommon IndirectBr case would minimize the +// impact of potentially longer critical path, if any, and the impact on compile +// time. +static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, + const TargetTransformInfo *TTI) { + BasicBlock *SrcBlock = GEPI->getParent(); + // Check that SrcBlock ends with an IndirectBr. If not, give up. The common + // (non-IndirectBr) cases exit early here. + if (!isa<IndirectBrInst>(SrcBlock->getTerminator())) + return false; + // Check that GEPI is a simple gep with a single constant index. + if (!GEPSequentialConstIndexed(GEPI)) + return false; + ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1)); + // Check that GEPI is a cheap one. + if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType()) + > TargetTransformInfo::TCC_Basic) + return false; + Value *GEPIOp = GEPI->getOperand(0); + // Check that GEPIOp is an instruction that's also defined in SrcBlock. + if (!isa<Instruction>(GEPIOp)) + return false; + auto *GEPIOpI = cast<Instruction>(GEPIOp); + if (GEPIOpI->getParent() != SrcBlock) + return false; + // Check that GEP is used outside the block, meaning it's alive on the + // IndirectBr edge(s). + if (find_if(GEPI->users(), [&](User *Usr) { + if (auto *I = dyn_cast<Instruction>(Usr)) { + if (I->getParent() != SrcBlock) { + return true; + } + } + return false; + }) == GEPI->users().end()) + return false; + // The second elements of the GEP chains to be unmerged. + std::vector<GetElementPtrInst *> UGEPIs; + // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive + // on IndirectBr edges. + for (User *Usr : GEPIOp->users()) { + if (Usr == GEPI) continue; + // Check if Usr is an Instruction. If not, give up. + if (!isa<Instruction>(Usr)) + return false; + auto *UI = cast<Instruction>(Usr); + // Check if Usr in the same block as GEPIOp, which is fine, skip. + if (UI->getParent() == SrcBlock) + continue; + // Check if Usr is a GEP. If not, give up. + if (!isa<GetElementPtrInst>(Usr)) + return false; + auto *UGEPI = cast<GetElementPtrInst>(Usr); + // Check if UGEPI is a simple gep with a single constant index and GEPIOp is + // the pointer operand to it. If so, record it in the vector. If not, give + // up. + if (!GEPSequentialConstIndexed(UGEPI)) + return false; + if (UGEPI->getOperand(0) != GEPIOp) + return false; + if (GEPIIdx->getType() != + cast<ConstantInt>(UGEPI->getOperand(1))->getType()) + return false; + ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); + if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType()) + > TargetTransformInfo::TCC_Basic) + return false; + UGEPIs.push_back(UGEPI); + } + if (UGEPIs.size() == 0) + return false; + // Check the materializing cost of (Uidx-Idx). + for (GetElementPtrInst *UGEPI : UGEPIs) { + ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); + APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); + unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType()); + if (ImmCost > TargetTransformInfo::TCC_Basic) + return false; + } + // Now unmerge between GEPI and UGEPIs. + for (GetElementPtrInst *UGEPI : UGEPIs) { + UGEPI->setOperand(0, GEPI); + ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); + Constant *NewUGEPIIdx = + ConstantInt::get(GEPIIdx->getType(), + UGEPIIdx->getValue() - GEPIIdx->getValue()); + UGEPI->setOperand(1, NewUGEPIIdx); + // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not + // inbounds to avoid UB. + if (!GEPI->isInBounds()) { + UGEPI->setIsInBounds(false); + } + } + // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not + // alive on IndirectBr edges). + assert(find_if(GEPIOp->users(), [&](User *Usr) { + return cast<Instruction>(Usr)->getParent() != SrcBlock; + }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock"); + return true; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -6186,6 +6288,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { optimizeInst(NC, ModifiedDT); return true; } + if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) { + return true; + } return false; } @@ -6266,7 +6371,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { Instruction *Insn = &*BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); // Leave dbg.values that refer to an alloca alone. These - // instrinsics describe the address of a variable (= the alloca) + // intrinsics describe the address of a variable (= the alloca) // being taken. They should not be moved next to the alloca // (and to the beginning of the scope), but rather stay close to // where said address is used. @@ -6298,7 +6403,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { /// \brief Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; - uint32_t Scale = (NewMax / UINT32_MAX) + 1; + uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1; NewTrue = NewTrue / Scale; NewFalse = NewFalse / Scale; } diff --git a/lib/CodeGen/CountingFunctionInserter.cpp b/lib/CodeGen/CountingFunctionInserter.cpp deleted file mode 100644 index 7f7350f5fb5c..000000000000 --- a/lib/CodeGen/CountingFunctionInserter.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Insert calls to counter functions, such as mcount, intended to be called -// once per function, at the beginning of each function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - struct CountingFunctionInserter : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - CountingFunctionInserter() : FunctionPass(ID) { - initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - } - - bool runOnFunction(Function &F) override { - std::string CountingFunctionName = - F.getFnAttribute("counting-function").getValueAsString(); - if (CountingFunctionName.empty()) - return false; - - Type *VoidTy = Type::getVoidTy(F.getContext()); - Constant *CountingFn = - F.getParent()->getOrInsertFunction(CountingFunctionName, - VoidTy); - CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt()); - return true; - } - }; - - char CountingFunctionInserter::ID = 0; -} - -INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", - "Inserts calls to mcount-like functions", false, false) - -//===----------------------------------------------------------------------===// -// -// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names. -// -FunctionPass *llvm::createCountingFunctionInserterPass() { - return new CountingFunctionInserter(); -} diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index a3cf2846d2f5..98e22b24d37a 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -1,4 +1,4 @@ -//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===// // // The LLVM Compiler Infrastructure // @@ -14,14 +14,29 @@ //===----------------------------------------------------------------------===// #include "CriticalAntiDepBreaker.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <map> +#include <utility> +#include <vector> using namespace llvm; @@ -35,8 +50,7 @@ CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi, Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} -CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { -} +CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default; void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { const unsigned BBSize = BB->size(); @@ -156,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { // FIXME: The issue with predicated instruction is more complex. We are being // conservative here because the kill markers cannot be trusted after // if-conversion: - // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] // ... - // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] - // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] - // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395] + // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12] + // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) // // The first R6 kill is not really a kill since it's killed by a predicated // instruction which may not be executed. The second R6 def may or may not @@ -333,8 +347,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { bool CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, RegRefIter RegRefEnd, - unsigned NewReg) -{ + unsigned NewReg) { for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) { MachineOperand *RefOper = I->second; @@ -381,8 +394,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &Forbid) -{ + SmallVectorImpl<unsigned> &Forbid) { ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { unsigned NewReg = Order[i]; @@ -423,7 +435,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, } unsigned CriticalAntiDepBreaker:: -BreakAntiDependencies(const std::vector<SUnit>& SUnits, +BreakAntiDependencies(const std::vector<SUnit> &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -436,7 +448,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // This is used for updating debug information. // // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap - DenseMap<MachineInstr*,const SUnit*> MISUnitMap; + DenseMap<MachineInstr *, const SUnit *> MISUnitMap; // Find the node at the bottom of the critical path. const SUnit *Max = nullptr; @@ -454,7 +466,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) - DEBUG(dbgs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI)); } DEBUG(dbgs() << '\n'); } @@ -634,9 +646,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, LastNewReg[AntiDepReg], RC, ForbidRegs)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " - << TRI->getName(AntiDepReg) - << " with " << RegRefs.count(AntiDepReg) << " references" - << " using " << TRI->getName(NewReg) << "!\n"); + << printReg(AntiDepReg, TRI) << " with " + << RegRefs.count(AntiDepReg) << " references" + << " using " << printReg(NewReg, TRI) << "!\n"); // Update the references to the old register to refer to the new // register. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 678779fa1a26..09c4423a2f05 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,16 +18,21 @@ #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Compiler.h" +#include <map> +#include <vector> namespace llvm { + +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; class RegisterClassInfo; class TargetInstrInfo; +class TargetRegisterClass; class TargetRegisterInfo; -class MachineFunction; class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; @@ -46,12 +51,13 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { /// corresponding value is null. If the register is live but used in /// multiple register classes, the corresponding value is -1 casted to a /// pointer. - std::vector<const TargetRegisterClass*> Classes; + std::vector<const TargetRegisterClass *> Classes; /// Map registers to all their references within a live range. std::multimap<unsigned, MachineOperand *> RegRefs; - typedef std::multimap<unsigned, MachineOperand *>::const_iterator - RegRefIter; + + using RegRefIter = + std::multimap<unsigned, MachineOperand *>::const_iterator; /// The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. @@ -66,7 +72,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { BitVector KeepRegs; public: - CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI); ~CriticalAntiDepBreaker() override; /// Initialize anti-dep breaking for a new basic block. @@ -74,7 +80,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. - unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -101,6 +107,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &Forbid); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 853b9afa1026..848db444270d 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -29,13 +29,13 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <iterator> @@ -336,6 +336,38 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, VLIWScheduler->finishBlock(); } +bool VLIWPacketizerList::alias(const MachineMemOperand &Op1, + const MachineMemOperand &Op2, + bool UseTBAA) const { + if (!Op1.getValue() || !Op2.getValue()) + return true; + + int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset()); + int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset; + int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset; + + AliasResult AAResult = + AA->alias(MemoryLocation(Op1.getValue(), Overlapa, + UseTBAA ? Op1.getAAInfo() : AAMDNodes()), + MemoryLocation(Op2.getValue(), Overlapb, + UseTBAA ? Op2.getAAInfo() : AAMDNodes())); + + return AAResult != NoAlias; +} + +bool VLIWPacketizerList::alias(const MachineInstr &MI1, + const MachineInstr &MI2, + bool UseTBAA) const { + if (MI1.memoperands_empty() || MI2.memoperands_empty()) + return true; + + for (const MachineMemOperand *Op1 : MI1.memoperands()) + for (const MachineMemOperand *Op2 : MI2.memoperands()) + if (alias(*Op1, *Op2, UseTBAA)) + return true; + return false; +} + // Add a DAG mutation object to the ordered list. void VLIWPacketizerList::addMutation( std::unique_ptr<ScheduleDAGMutation> Mutation) { diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 91d18e2bcaa6..e6a54bb300f2 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -15,11 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -95,7 +94,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool AnyChanges = false; diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index ab9a0592e017..7d7eb57352a2 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -17,12 +17,12 @@ /// when subregisters are involved. /// /// Example: -/// %vreg0 = some definition -/// %vreg1 = IMPLICIT_DEF -/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1 -/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1 -/// = use %vreg3 -/// The %vreg0 definition is dead and %vreg3 contains an undefined value. +/// %0 = some definition +/// %1 = IMPLICIT_DEF +/// %2 = REG_SEQUENCE %0, sub0, %1, sub1 +/// %3 = EXTRACT_SUBREG %2, sub1 +/// = use %3 +/// The %0 definition is dead and %3 contains an undefined value. // //===----------------------------------------------------------------------===// @@ -34,14 +34,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -526,7 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); const VRegInfo &Info = VRegInfos[RegIdx]; - dbgs() << PrintReg(Reg, nullptr) + dbgs() << printReg(Reg, nullptr) << " Used: " << PrintLaneMask(Info.UsedLanes) << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; } diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 2f833260bca2..39d80c0bf9bd 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -1,4 +1,4 @@ -//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===// +//===- DwarfEHPrepare - Prepare exception handling for code generation ----===// // // The LLVM Compiler Infrastructure // @@ -13,20 +13,29 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Local.h" +#include <cstddef> + using namespace llvm; #define DEBUG_TYPE "dwarfehprepare" @@ -34,12 +43,13 @@ using namespace llvm; STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { + class DwarfEHPrepare : public FunctionPass { // RewindFunction - _Unwind_Resume or the target equivalent. - Constant *RewindFunction; + Constant *RewindFunction = nullptr; - DominatorTree *DT; - const TargetLowering *TLI; + DominatorTree *DT = nullptr; + const TargetLowering *TLI = nullptr; bool InsertUnwindResumeCalls(Function &Fn); Value *GetExceptionObject(ResumeInst *RI); @@ -51,9 +61,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare() - : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) { - } + DwarfEHPrepare() : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -68,9 +76,11 @@ namespace { return "Exception handling preparation"; } }; + } // end anonymous namespace char DwarfEHPrepare::ID = 0; + INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE, "Prepare DWARF exceptions", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) @@ -162,7 +172,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( BasicBlock *BB = RI->getParent(); new UnreachableInst(Ctx, RI); RI->eraseFromParent(); - SimplifyCFG(BB, TTI, 1); + simplifyCFG(BB, TTI); } } Resumes.resize(ResumesLeft); diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 402afe75b141..6294ff450113 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -30,12 +30,12 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to // get right. if (!MBB->livein_empty()) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); return false; } @@ -199,7 +199,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { continue; if (++InstrCount > BlockInstrLimit && !Stress) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " << BlockInstrLimit << " instructions.\n"); return false; } @@ -246,7 +246,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { if (!DefMI || DefMI->getParent() != Head) continue; if (InsertAfter.insert(DefMI).second) - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI); if (DefMI->isTerminator()) { DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); return false; @@ -317,7 +317,7 @@ bool SSAIfConv::findInsertionPoint() { dbgs() << "Would clobber"; for (SparseSet<unsigned>::const_iterator i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) - dbgs() << ' ' << PrintRegUnit(*i, TRI); + dbgs() << ' ' << printRegUnit(*i, TRI); dbgs() << " live before " << *I; }); continue; @@ -361,10 +361,10 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || Succ1->succ_begin()[0] != Tail) return false; - DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() - << " -> BB#" << Succ0->getNumber() - << "/BB#" << Succ1->getNumber() - << " -> BB#" << Tail->getNumber() << '\n'); + DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << "/" + << printMBBReference(*Succ1) << " -> " + << printMBBReference(*Tail) << '\n'); // Live-in physregs are tricky to get right when speculating code. if (!Tail->livein_empty()) { @@ -372,9 +372,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { return false; } } else { - DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() - << " -> BB#" << Succ0->getNumber() - << " -> BB#" << Tail->getNumber() << '\n'); + DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << " -> " + << printMBBReference(*Tail) << '\n'); } // This is a triangle or a diamond. @@ -563,8 +563,8 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { assert(Head->succ_empty() && "Additional head successors?"); if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { // Splice Tail onto the end of Head. - DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() - << " into head BB#" << Head->getNumber() << '\n'); + DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head " + << printMBBReference(*Head) << '\n'); Head->splice(Head->end(), Tail, Tail->begin(), Tail->end()); Head->transferSuccessorsAndUpdatePHIs(Tail); @@ -785,7 +785,7 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Only run if conversion if the target wants it. diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index b3a25544be39..54c53eb16312 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -80,13 +80,15 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, O << "digraph {\n"; for (const auto &MBB : *MF) { unsigned BB = MBB.getNumber(); - O << "\t\"BB#" << BB << "\" [ shape=box ]\n" - << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" - << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; + O << "\t\"" << printMBBReference(MBB) << "\" [ shape=box ]\n" + << '\t' << G.getBundle(BB, false) << " -> \"" << printMBBReference(MBB) + << "\"\n" + << "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true) + << '\n'; for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() - << "\" [ color=lightgray ]\n"; + O << "\t\"" << printMBBReference(MBB) << "\" -> \"" + << printMBBReference(**SI) << "\" [ color=lightgray ]\n"; } O << "}\n"; return O; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index e272d25047e6..61ec3f4be1dc 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -15,11 +15,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -200,7 +200,7 @@ void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { LiveRegs[rx].Def = -1; } } - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); return; } @@ -246,7 +246,7 @@ void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { } } DEBUG( - dbgs() << "BB#" << MBB->getNumber() + dbgs() << printMBBReference(*MBB) << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n")); } @@ -394,7 +394,7 @@ void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency, continue; for (int rx : regIndices(MO.getReg())) { // This instruction explicitly defines rx. - DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr << '\t' << *MI); if (breakDependency) { @@ -617,7 +617,7 @@ bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) { } bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; MF = &mf; TII = MF->getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 324ea171293d..ec586a2caea3 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "expand-isel-pseudos" diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp new file mode 100644 index 000000000000..09c808463a41 --- /dev/null +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -0,0 +1,825 @@ +//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to expand memcmp() calls into optimally-sized loads and +// compares for the target. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "expandmemcmp" + +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, + "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + +static cl::opt<unsigned> MemCmpNumLoadsPerBlock( + "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), + cl::desc("The number of loads per basic block for inline expansion of " + "memcmp that is only being compared against zero.")); + +namespace { + + +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { + struct ResultBlock { + BasicBlock *BB = nullptr; + PHINode *PhiSrc1 = nullptr; + PHINode *PhiSrc2 = nullptr; + + ResultBlock() = default; + }; + + CallInst *const CI; + ResultBlock ResBlock; + const uint64_t Size; + unsigned MaxLoadSize; + uint64_t NumLoadsNonOneByte; + const uint64_t NumLoadsPerBlock; + std::vector<BasicBlock *> LoadCmpBlocks; + BasicBlock *EndBlock; + PHINode *PhiRes; + const bool IsUsedForZeroCmp; + const DataLayout &DL; + IRBuilder<> Builder; + // Represents the decomposition in blocks of the expansion. For example, + // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and + // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. + // TODO(courbet): Involve the target more in this computation. On X86, 7 + // bytes can be done more efficiently with two overlaping 4-byte loads than + // covering the interval with [{4, 0},{2, 4},{1, 6}}. + struct LoadEntry { + LoadEntry(unsigned LoadSize, uint64_t Offset) + : LoadSize(LoadSize), Offset(Offset) { + assert(Offset % LoadSize == 0 && "invalid load entry"); + } + + uint64_t getGEPIndex() const { return Offset / LoadSize; } + + // The size of the load for this block, in bytes. + const unsigned LoadSize; + // The offset of this load WRT the base pointer, in bytes. + const uint64_t Offset; + }; + SmallVector<LoadEntry, 8> LoadSequence; + + void createLoadCmpBlocks(); + void createResultBlock(); + void setupResultBlockPHINodes(); + void setupEndBlockPHINodes(); + Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); + void emitLoadCompareBlock(unsigned BlockIndex); + void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex); + void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); + void emitMemCmpResultBlock(); + Value *getMemCmpExpansionZeroCase(); + Value *getMemCmpEqZeroOneBlock(); + Value *getMemCmpOneBlock(); + + public: + MemCmpExpansion(CallInst *CI, uint64_t Size, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + unsigned MaxNumLoads, const bool IsUsedForZeroCmp, + unsigned NumLoadsPerBlock, const DataLayout &DL); + + unsigned getNumBlocks(); + uint64_t getNumLoads() const { return LoadSequence.size(); } + + Value *getMemCmpExpansion(); +}; + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion( + CallInst *const CI, uint64_t Size, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, + const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) + : CI(CI), + Size(Size), + MaxLoadSize(0), + NumLoadsNonOneByte(0), + NumLoadsPerBlock(NumLoadsPerBlock), + IsUsedForZeroCmp(IsUsedForZeroCmp), + DL(TheDataLayout), + Builder(CI) { + assert(Size > 0 && "zero blocks"); + // Scale the max size down if the target can load more bytes than we need. + size_t LoadSizeIndex = 0; + while (LoadSizeIndex < Options.LoadSizes.size() && + Options.LoadSizes[LoadSizeIndex] > Size) { + ++LoadSizeIndex; + } + this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; + // Compute the decomposition. + uint64_t CurSize = Size; + uint64_t Offset = 0; + while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { + const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; + assert(LoadSize > 0 && "zero load size"); + const uint64_t NumLoadsForThisSize = CurSize / LoadSize; + if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { + // Do not expand if the total number of loads is larger than what the + // target allows. Note that it's important that we exit before completing + // the expansion to avoid using a ton of memory to store the expansion for + // large sizes. + LoadSequence.clear(); + return; + } + if (NumLoadsForThisSize > 0) { + for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) { + ++NumLoadsNonOneByte; + } + CurSize = CurSize % LoadSize; + } + ++LoadSizeIndex; + } + assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); +} + +unsigned MemCmpExpansion::getNumBlocks() { + if (IsUsedForZeroCmp) + return getNumLoads() / NumLoadsPerBlock + + (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads(); +} + +void MemCmpExpansion::createLoadCmpBlocks() { + for (unsigned i = 0; i < getNumBlocks(); i++) { + BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", + EndBlock->getParent(), EndBlock); + LoadCmpBlocks.push_back(BB); + } +} + +void MemCmpExpansion::createResultBlock() { + ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", + EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp parameters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, + unsigned GEPIndex) { + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); + + if (BlockIndex < (LoadCmpBlocks.size() - 1)) { + // Early exit branch if difference found to EndBlock. Otherwise, continue to + // next LoadCmpBlock, + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BranchInst *CmpBr = + BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); + Builder.Insert(CmpBr); + } else { + // The last block has an unconditional branch to EndBlock. + BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); + } +} + +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, + unsigned &LoadIndex) { + assert(LoadIndex < getNumLoads() && + "getCompareLoadPairs() called with no remaining loads"); + std::vector<Value *> XorList, OrList; + Value *Diff; + + const unsigned NumLoads = + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); + + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + + Value *Cmp = nullptr; + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. The type for the combinations is the largest load + // type. + IntegerType *const MaxLoadType = + NumLoads == 1 ? nullptr + : IntegerType::get(CI->getContext(), MaxLoadSize * 8); + for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { + const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; + + IntegerType *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + } + + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast<Constant>(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast<Constant>(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (NumLoads != 1) { + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExt(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + } + } + + auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { + std::vector<Value *> OutList; + for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { + Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); + OutList.push_back(Or); + } + if (InList.size() % 2 != 0) + OutList.push_back(InList.back()); + return OutList; + }; + + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); + + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + } + + return Cmp; +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex) { + Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); + + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[BlockIndex + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, + // continue to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (BlockIndex == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); + } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { + // There is one load per block in this case, BlockIndex == LoadIndex. + const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; + + if (CurLoadEntry.LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, + CurLoadEntry.getGEPIndex()); + return; + } + + Type *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + } + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian()) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + + // Add the loaded values to the phi nodes for calculating memcmp result only + // if result is not used in a zero equality. + if (!IsUsedForZeroCmp) { + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); + } + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[BlockIndex + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, continue + // to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (BlockIndex == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); + } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock() { + // Special case: if memcmp result is used in a zero equality, result does not + // need to be calculated and can simply return 1. + if (IsUsedForZeroCmp) { + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); + PhiRes->addIncoming(Res, ResBlock.BB); + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + return; + } + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, + ResBlock.PhiSrc2); + + Value *Res = + Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), + ConstantInt::get(Builder.getInt32Ty(), 1)); + + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + PhiRes->addIncoming(Res, ResBlock.BB); +} + +void MemCmpExpansion::setupResultBlockPHINodes() { + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Builder.SetInsertPoint(ResBlock.BB); + // Note: this assumes one load per block. + ResBlock.PhiSrc1 = + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); + ResBlock.PhiSrc2 = + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { + Builder.SetInsertPoint(&EndBlock->front()); + PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { + unsigned LoadIndex = 0; + // This loop populates each of the LoadCmpBlocks with the IR sequence to + // handle multiple loads per block. + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlockMultipleLoads(I, LoadIndex); + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { + unsigned LoadIndex = 0; + Value *Cmp = getCompareLoadPairs(0, LoadIndex); + assert(LoadIndex == getNumLoads() && "some entries were not consumed"); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + +/// A memcmp expansion that only has one block of load and compare can bypass +/// the compare, branch, and phi IR that is required in the general case. +Value *MemCmpExpansion::getMemCmpOneBlock() { + assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian() && Size != 1) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (Size < 4) { + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); + return Builder.CreateSub(LoadSrc1, LoadSrc2); + } + + // The result of memcmp is negative, zero, or positive, so produce that by + // subtracting 2 extended compare bits: sub (ugt, ult). + // If a target prefers to use selects to get -1/0/1, they should be able + // to transform this later. The inverse transform (going from selects to math) + // may not be possible in the DAG because the selects got converted into + // branches before we got there. + Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); + Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); + Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); + return Builder.CreateSub(ZextUGT, ZextULT); +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion() { + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + if (IsUsedForZeroCmp) + return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() + : getMemCmpExpansionZeroCase(); + + // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). + if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlock(I); + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced with a new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +/// %0 = bitcast i32* %buffer2 to i8* +/// %1 = bitcast i32* %buffer1 to i8* +/// %2 = bitcast i8* %1 to i64* +/// %3 = bitcast i8* %0 to i64* +/// %4 = load i64, i64* %2 +/// %5 = load i64, i64* %3 +/// %6 = call i64 @llvm.bswap.i64(i64 %4) +/// %7 = call i64 @llvm.bswap.i64(i64 %5) +/// %8 = sub i64 %6, %7 +/// %9 = icmp ne i64 %8, 0 +/// br i1 %9, label %res_block, label %loadbb1 +/// res_block: ; preds = %loadbb2, +/// %loadbb1, %loadbb +/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +/// %10 = icmp ult i64 %phi.src1, %phi.src2 +/// %11 = select i1 %10, i32 -1, i32 1 +/// br label %endblock +/// loadbb1: ; preds = %loadbb +/// %12 = bitcast i32* %buffer2 to i8* +/// %13 = bitcast i32* %buffer1 to i8* +/// %14 = bitcast i8* %13 to i32* +/// %15 = bitcast i8* %12 to i32* +/// %16 = getelementptr i32, i32* %14, i32 2 +/// %17 = getelementptr i32, i32* %15, i32 2 +/// %18 = load i32, i32* %16 +/// %19 = load i32, i32* %17 +/// %20 = call i32 @llvm.bswap.i32(i32 %18) +/// %21 = call i32 @llvm.bswap.i32(i32 %19) +/// %22 = zext i32 %20 to i64 +/// %23 = zext i32 %21 to i64 +/// %24 = sub i64 %22, %23 +/// %25 = icmp ne i64 %24, 0 +/// br i1 %25, label %res_block, label %loadbb2 +/// loadbb2: ; preds = %loadbb1 +/// %26 = bitcast i32* %buffer2 to i8* +/// %27 = bitcast i32* %buffer1 to i8* +/// %28 = bitcast i8* %27 to i16* +/// %29 = bitcast i8* %26 to i16* +/// %30 = getelementptr i16, i16* %28, i16 6 +/// %31 = getelementptr i16, i16* %29, i16 6 +/// %32 = load i16, i16* %30 +/// %33 = load i16, i16* %31 +/// %34 = call i16 @llvm.bswap.i16(i16 %32) +/// %35 = call i16 @llvm.bswap.i16(i16 %33) +/// %36 = zext i16 %34 to i64 +/// %37 = zext i16 %35 to i64 +/// %38 = sub i64 %36, %37 +/// %39 = icmp ne i64 %38, 0 +/// br i1 %39, label %res_block, label %loadbb3 +/// loadbb3: ; preds = %loadbb2 +/// %40 = bitcast i32* %buffer2 to i8* +/// %41 = bitcast i32* %buffer1 to i8* +/// %42 = getelementptr i8, i8* %41, i8 14 +/// %43 = getelementptr i8, i8* %40, i8 14 +/// %44 = load i8, i8* %42 +/// %45 = load i8, i8* %43 +/// %46 = zext i8 %44 to i32 +/// %47 = zext i8 %45 to i32 +/// %48 = sub i32 %46, %47 +/// br label %endblock +/// endblock: ; preds = %res_block, +/// %loadbb3 +/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +/// ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, + const TargetLowering *TLI, const DataLayout *DL) { + NumMemCmpCalls++; + + // Early exit from expansion if -Oz. + if (CI->getFunction()->optForMinSize()) + return false; + + // Early exit from expansion if size is not a constant. + ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeCast) { + NumMemCmpNotConstant++; + return false; + } + const uint64_t SizeVal = SizeCast->getZExtValue(); + + if (SizeVal == 0) { + return false; + } + + // TTI call to check if target would like to expand memcmp. Also, get the + // available load sizes. + const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); + if (!Options) return false; + + const unsigned MaxNumLoads = + TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + + MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, + IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); + + // Don't expand if this will require more loads than desired by the target. + if (Expansion.getNumLoads() == 0) { + NumMemCmpGreaterThanMax++; + return false; + } + + NumMemCmpInlined++; + + Value *Res = Expansion.getMemCmpExpansion(); + + // Replace call with result of expansion and erase call. + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + + return true; +} + + + +class ExpandMemCmpPass : public FunctionPass { +public: + static char ID; + + ExpandMemCmpPass() : FunctionPass(ID) { + initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) { + return false; + } + const TargetLowering* TL = + TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); + + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + const TargetTransformInfo *TTI = + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto PA = runImpl(F, TLI, TTI, TL); + return !PA.areAllPreserved(); + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + const TargetLowering* TL); + // Returns true if a change was made. + bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, const TargetLowering* TL, + const DataLayout& DL); +}; + +bool ExpandMemCmpPass::runOnBlock( + BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, const TargetLowering* TL, + const DataLayout& DL) { + for (Instruction& I : BB) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) { + continue; + } + LibFunc Func; + if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) { + return true; + } + } + return false; +} + + +PreservedAnalyses ExpandMemCmpPass::runImpl( + Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, + const TargetLowering* TL) { + const DataLayout& DL = F.getParent()->getDataLayout(); + bool MadeChanges = false; + for (auto BBIt = F.begin(); BBIt != F.end();) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) { + MadeChanges = true; + // If changes were made, restart the function from the beginning, since + // the structure of the function was changed. + BBIt = F.begin(); + } else { + ++BBIt; + } + } + return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +} // namespace + +char ExpandMemCmpPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", + "Expand memcmp() to load/stores", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp", + "Expand memcmp() to load/stores", false, false) + +FunctionPass *llvm::createExpandMemCmpPass() { + return new ExpandMemCmpPass(); +} diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 4ce86f27a7dd..6ef97d6dd5ec 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -17,11 +17,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -104,8 +104,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { if (DstSubReg == InsReg) { // No need to insert an identity copy instruction. // Watch out for case like this: - // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 - // We must leave %RAX live. + // %rax = SUBREG_TO_REG 0, killed %eax, 3 + // We must leave %rax live. if (DstReg != InsReg) { MI->setDesc(TII->get(TargetOpcode::KILL)); MI->RemoveOperand(3); // SubIdx diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp index 70dca3b74b2f..abf487a4f198 100644 --- a/lib/CodeGen/ExpandReductions.cpp +++ b/lib/CodeGen/ExpandReductions.cpp @@ -95,7 +95,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { // and it can't be handled by generating this shuffle sequence. // TODO: Implement scalarization of ordered reductions here for targets // without native support. - if (!II->getFastMathFlags().unsafeAlgebra()) + if (!II->getFastMathFlags().isFast()) continue; Vec = II->getArgOperand(1); break; diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp index 0759bf6713e0..4ddf9f92836c 100644 --- a/lib/CodeGen/FEntryInserter.cpp +++ b/lib/CodeGen/FEntryInserter.cpp @@ -15,11 +15,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -36,15 +36,13 @@ struct FEntryInserter : public MachineFunctionPass { bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) { const std::string FEntryName = - MF.getFunction()->getFnAttribute("fentry-call").getValueAsString(); + MF.getFunction().getFnAttribute("fentry-call").getValueAsString(); if (FEntryName != "true") return false; auto &FirstMBB = *MF.begin(); - auto &FirstMI = *FirstMBB.begin(); - auto *TII = MF.getSubtarget().getInstrInfo(); - BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), + BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(), TII->get(TargetOpcode::FENTRY_CALL)); return true; } diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 35246545ca91..4361d8b248c8 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -18,17 +18,16 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -329,10 +328,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. - if (!MF.getFunction()->hasGC()) + if (!MF.getFunction().hasGC()) return false; - FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction()); MMI = &getAnalysis<MachineModuleInfo>(); TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt index eba7ea8132e3..2db90f8888cb 100644 --- a/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -1,34 +1,18 @@ -# List of all GlobalISel files. -set(GLOBAL_ISEL_FILES - CallLowering.cpp - IRTranslator.cpp - InstructionSelect.cpp - InstructionSelector.cpp - MachineIRBuilder.cpp - LegalizerHelper.cpp - Legalizer.cpp - LegalizerInfo.cpp - Localizer.cpp - RegBankSelect.cpp - RegisterBank.cpp - RegisterBankInfo.cpp - Utils.cpp - ) - -# Add GlobalISel files to the dependencies if the user wants to build it. -if(LLVM_BUILD_GLOBAL_ISEL) - set(GLOBAL_ISEL_BUILD_FILES ${GLOBAL_ISEL_FILES}) -else() - set(GLOBAL_ISEL_BUILD_FILES"") - set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES}) -endif() - -# In LLVMBuild.txt files, it is not possible to mark a dependency to a -# library as optional. So instead, generate an empty library if we did -# not ask for it. add_llvm_library(LLVMGlobalISel - ${GLOBAL_ISEL_BUILD_FILES} + CallLowering.cpp GlobalISel.cpp + IRTranslator.cpp + InstructionSelect.cpp + InstructionSelector.cpp + LegalizerHelper.cpp + Legalizer.cpp + LegalizerInfo.cpp + Localizer.cpp + MachineIRBuilder.cpp + RegBankSelect.cpp + RegisterBank.cpp + RegisterBankInfo.cpp + Utils.cpp DEPENDS intrinsics_gen diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index be0c5c2bb70e..114c068749eb 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetLowering.h" using namespace llvm; @@ -108,7 +108,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector<CCValAssign, 16> ArgLocs; @@ -160,10 +160,11 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, // FIXME: bitconverting between vector types may or may not be a // nop in big-endian situations. return ValReg; - case CCValAssign::AExt: + case CCValAssign::AExt: { assert(!VA.getLocVT().isVector() && "unexpected vector extend"); - // Otherwise, it's a nop. - return ValReg; + auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg); + return MIB->getOperand(0).getReg(); + } case CCValAssign::SExt: { unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); MIRBuilder.buildSExt(NewReg, ValReg); diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp index 29d1209bb02a..00c6a9d63158 100644 --- a/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -16,13 +16,6 @@ using namespace llvm; -#ifndef LLVM_BUILD_GLOBAL_ISEL - -void llvm::initializeGlobalISel(PassRegistry &Registry) { -} - -#else - void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); initializeLegalizerPass(Registry); @@ -30,4 +23,3 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeRegBankSelectPass(Registry); initializeInstructionSelectPass(Registry); } -#endif // LLVM_BUILD_GLOBAL_ISEL diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index ed1bd995e60b..433f99b0113b 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -15,7 +15,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/LowLevelType.h" @@ -26,7 +26,11 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -54,12 +58,8 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -124,8 +124,8 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) { bool Success = translate(*CV, VReg); if (!Success) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - MF->getFunction()->getSubprogram(), - &MF->getFunction()->getEntryBlock()); + MF->getFunction().getSubprogram(), + &MF->getFunction().getEntryBlock()); R << "unable to translate constant: " << ore::NV("Type", Val.getType()); reportTranslationError(*MF, *TPC, *ORE, R); return VReg; @@ -238,6 +238,8 @@ bool IRTranslator::translateCompare(const User &U, bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { const ReturnInst &RI = cast<ReturnInst>(U); const Value *Ret = RI.getReturnValue(); + if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) + Ret = nullptr; // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. @@ -337,6 +339,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { : MachineMemOperand::MONone; Flags |= MachineMemOperand::MOLoad; + if (DL->getTypeStoreSize(LI.getType()) == 0) + return true; + unsigned Res = getOrCreateVReg(LI); unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); @@ -355,6 +360,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { : MachineMemOperand::MONone; Flags |= MachineMemOperand::MOStore; + if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) + return true; + unsigned Val = getOrCreateVReg(*SI.getValueOperand()); unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); @@ -583,7 +591,7 @@ void IRTranslator::getStackGuard(unsigned DstReg, MIB.addDef(DstReg); auto &TLI = *MF->getSubtarget().getTargetLowering(); - Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent()); + Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent()); if (!Global) return; @@ -593,7 +601,7 @@ void IRTranslator::getStackGuard(unsigned DstReg, MachineMemOperand::MODereferenceable; *MemRefs = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment()); + DL->getPointerABIAlignment(0)); MIB.setMemRefs(MemRefs, MemRefs + 1); } @@ -682,23 +690,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - MIRBuilder.buildIndirectDbgValue(0, DI.getOffset(), DI.getVariable(), - DI.getExpression()); + MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast<Constant>(V)) { - MIRBuilder.buildConstDbgValue(*CI, DI.getOffset(), DI.getVariable(), - DI.getExpression()); + MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { unsigned Reg = getOrCreateVReg(*V); // FIXME: This does not handle register-indirect values at offset 0. The // direct/indirect thing shouldn't really be handled by something as // implicit as reg+noreg vs reg+imm in the first palce, but it seems // pretty baked in right now. - if (DI.getOffset() != 0) - MIRBuilder.buildIndirectDbgValue(Reg, DI.getOffset(), DI.getVariable(), - DI.getExpression()); - else - MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), - DI.getExpression()); + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); } return true; } @@ -850,14 +851,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. - if (TLI.getTgtMemIntrinsic(Info, CI, ID)) { - MachineMemOperand::Flags Flags = - Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - Flags |= - Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore; - uint64_t Size = Info.memVT.getSizeInBits() >> 3; + if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { + uint64_t Size = Info.memVT.getStoreSize(); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Flags, Size, Info.align)); + Info.flags, Size, Info.align)); } return true; @@ -928,7 +925,7 @@ bool IRTranslator::translateLandingPad(const User &U, // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother. auto &TLI = *MF->getSubtarget().getTargetLowering(); - const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn(); + const Constant *PersonalityFn = MF->getFunction().getPersonalityFn(); if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && TLI.getExceptionSelectorRegister(PersonalityFn) == 0) return true; @@ -1105,7 +1102,7 @@ bool IRTranslator::translateShuffleVector(const User &U, bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) { const PHINode &PI = cast<PHINode>(U); - auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI); + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI); MIB.addDef(getOrCreateVReg(PI)); PendingPHIs.emplace_back(&PI, MIB.getInstr()); @@ -1239,7 +1236,7 @@ void IRTranslator::finalizeFunction() { bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; - const Function &F = *MF->getFunction(); + const Function &F = MF->getFunction(); if (F.empty()) return false; CLI = MF->getSubtarget().getCallLowering(); @@ -1252,6 +1249,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { assert(PendingPHIs.empty() && "stale PHIs"); + if (!DL->isLittleEndian()) { + // Currently we don't properly handle big endian code. + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to translate in big endian mode"; + reportTranslationError(*MF, *TPC, *ORE, R); + } + // Release the per-function state when we return, whether we succeeded or not. auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); }); @@ -1276,12 +1281,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Lower the actual args into this basic block. SmallVector<unsigned, 8> VRegArgs; - for (const Argument &Arg: F.args()) + for (const Argument &Arg: F.args()) { + if (DL->getTypeStoreSize(Arg.getType()) == 0) + continue; // Don't handle zero sized types. VRegArgs.push_back(getOrCreateVReg(Arg)); + } if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - MF->getFunction()->getSubprogram(), - &MF->getFunction()->getEntryBlock()); + F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); reportTranslationError(*MF, *TPC, *ORE, R); return false; @@ -1298,14 +1305,18 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { if (translate(Inst)) continue; - std::string InstStrStorage; - raw_string_ostream InstStr(InstStrStorage); - InstStr << Inst; - OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", Inst.getDebugLoc(), &BB); - R << "unable to translate instruction: " << ore::NV("Opcode", &Inst) - << ": '" << InstStr.str() << "'"; + R << "unable to translate instruction: " << ore::NV("Opcode", &Inst); + + if (ORE->allowExtraAnalysis("gisel-irtranslator")) { + std::string InstStrStorage; + raw_string_ostream InstStr(InstStrStorage); + InstStr << Inst; + + R << ": '" << InstStr.str() << "'"; + } + reportTranslationError(*MF, *TPC, *ORE, R); return false; } diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index a16e14fe2db6..422cc2219aa8 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -19,18 +19,29 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" #define DEBUG_TYPE "instruction-select" using namespace llvm; +#ifdef LLVM_GISEL_COV_PREFIX +static cl::opt<std::string> + CoveragePrefix("gisel-coverage-prefix", cl::init(LLVM_GISEL_COV_PREFIX), + cl::desc("Record GlobalISel rule coverage files of this " + "prefix if instrumentation was generated")); +#else +static const std::string CoveragePrefix = ""; +#endif + char InstructionSelect::ID = 0; INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", @@ -66,6 +77,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); // An optimization remark emitter. Used to report failures. @@ -127,7 +139,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!ISel->select(MI)) { + if (!ISel->select(MI, CoverageInfo)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI); @@ -177,7 +189,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { if (MF.size() != NumBlocks) { MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure", - MF.getFunction()->getSubprogram(), + MF.getFunction().getSubprogram(), /*MBB=*/nullptr); R << "inserting blocks is not supported yet"; reportGISelFailure(MF, TPC, MORE, R); @@ -187,6 +199,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); + CoverageInfo.emit(CoveragePrefix, + MF.getSubtarget() + .getTargetLowering() + ->getTargetMachine() + .getTarget() + .getBackendName()); + // FIXME: Should we accurately track changes? return true; } diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index bf427225d6a9..88669bd68c00 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -6,8 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file implements the InstructionSelector class. +// //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" @@ -16,14 +18,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/IR/Constants.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #define DEBUG_TYPE "instructionselector" @@ -31,7 +30,7 @@ using namespace llvm; InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers, nullptr), MIs() {} + : Renderers(MaxRenderers), MIs() {} InstructionSelector::InstructionSelector() = default; @@ -100,7 +99,30 @@ bool InstructionSelector::isOperandImmEqual( return false; } -bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const { +bool InstructionSelector::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_GEP) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + +bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, + MachineInstr &IntoMI) const { + // Immediate neighbours are already folded. + if (MI.getParent() == IntoMI.getParent() && + std::next(MI.getIterator()) == IntoMI.getIterator()) + return true; + return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().begin() == MI.implicit_operands().end(); } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index b699156c568b..f09b0d9f11e7 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -14,14 +14,17 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/GlobalISel/GISelWorkList.h" +#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <iterator> @@ -50,79 +53,18 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { void Legalizer::init(MachineFunction &MF) { } -bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII, - MachineIRBuilder &MIRBuilder) { - if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) +static bool isArtifact(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: return false; - - unsigned NumDefs = MI.getNumOperands() - 1; - unsigned SrcReg = MI.getOperand(NumDefs).getReg(); - MachineInstr &MergeI = *MRI.def_instr_begin(SrcReg); - if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES) - return false; - - const unsigned NumMergeRegs = MergeI.getNumOperands() - 1; - - if (NumMergeRegs < NumDefs) { - if (NumDefs % NumMergeRegs != 0) - return false; - - MIRBuilder.setInstr(MI); - // Transform to UNMERGEs, for example - // %1 = G_MERGE_VALUES %4, %5 - // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 - // to - // %9, %10 = G_UNMERGE_VALUES %4 - // %11, %12 = G_UNMERGE_VALUES %5 - - const unsigned NewNumDefs = NumDefs / NumMergeRegs; - for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { - SmallVector<unsigned, 2> DstRegs; - for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; - ++j, ++DefIdx) - DstRegs.push_back(MI.getOperand(DefIdx).getReg()); - - MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg()); - } - - } else if (NumMergeRegs > NumDefs) { - if (NumMergeRegs % NumDefs != 0) - return false; - - MIRBuilder.setInstr(MI); - // Transform to MERGEs - // %6 = G_MERGE_VALUES %17, %18, %19, %20 - // %7, %8 = G_UNMERGE_VALUES %6 - // to - // %7 = G_MERGE_VALUES %17, %18 - // %8 = G_MERGE_VALUES %19, %20 - - const unsigned NumRegs = NumMergeRegs / NumDefs; - for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { - SmallVector<unsigned, 2> Regs; - for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx) - Regs.push_back(MergeI.getOperand(Idx).getReg()); - - MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); - } - - } else { - // FIXME: is a COPY appropriate if the types mismatch? We know both - // registers are allocatable by now. - if (MRI.getType(MI.getOperand(0).getReg()) != - MRI.getType(MergeI.getOperand(1).getReg())) - return false; - - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI.getOperand(Idx + 1).getReg()); + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_MERGE_VALUES: + case TargetOpcode::G_UNMERGE_VALUES: + return true; } - - MI.eraseFromParent(); - if (MRI.use_empty(MergeI.getOperand(0).getReg())) - MergeI.eraseFromParent(); - return true; } bool Legalizer::runOnMachineFunction(MachineFunction &MF) { @@ -136,79 +78,108 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); LegalizerHelper Helper(MF); - // FIXME: an instruction may need more than one pass before it is legal. For - // example on most architectures <3 x i3> is doubly-illegal. It would - // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We - // probably want a worklist of instructions rather than naive iterate until - // convergence for performance reasons. - bool Changed = false; - MachineBasicBlock::iterator NextMI; - for (auto &MBB : MF) { - for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { - // Get the next Instruction before we try to legalize, because there's a - // good chance MI will be deleted. - NextMI = std::next(MI); + const size_t NumBlocks = MF.size(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Populate Insts + GISelWorkList<256> InstList; + GISelWorkList<128> ArtifactList; + ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); + // Perform legalization bottom up so we can DCE as we legalize. + // Traverse BB in RPOT and within each basic block, add insts top down, + // so when we pop_back_val in the legalization process, we traverse bottom-up. + for (auto *MBB : RPOT) { + if (MBB->empty()) + continue; + for (MachineInstr &MI : *MBB) { // Only legalize pre-isel generic instructions: others don't have types // and are assumed to be legal. - if (!isPreISelGenericOpcode(MI->getOpcode())) + if (!isPreISelGenericOpcode(MI.getOpcode())) continue; - unsigned NumNewInsns = 0; - SmallVector<MachineInstr *, 4> WorkList; - Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { - // Only legalize pre-isel generic instructions. - // Legalization process could generate Target specific pseudo - // instructions with generic types. Don't record them - if (isPreISelGenericOpcode(MI->getOpcode())) { - ++NumNewInsns; - WorkList.push_back(MI); - } - }); - WorkList.push_back(&*MI); - - bool Changed = false; - LegalizerHelper::LegalizeResult Res; - unsigned Idx = 0; - do { - Res = Helper.legalizeInstrStep(*WorkList[Idx]); - // Error out if we couldn't legalize this instruction. We may want to - // fall back to DAG ISel instead in the future. - if (Res == LegalizerHelper::UnableToLegalize) { - Helper.MIRBuilder.stopRecordingInsertions(); - if (Res == LegalizerHelper::UnableToLegalize) { - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", - "unable to legalize instruction", - *WorkList[Idx]); - return false; - } - } - Changed |= Res == LegalizerHelper::Legalized; - ++Idx; - -#ifndef NDEBUG - if (NumNewInsns) { - DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n"); - for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end(); - I != E; ++I) - DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs())); - NumNewInsns = 0; - } -#endif - } while (Idx < WorkList.size()); - - Helper.MIRBuilder.stopRecordingInsertions(); + if (isArtifact(MI)) + ArtifactList.insert(&MI); + else + InstList.insert(&MI); } } - - MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - for (auto &MBB : MF) { - for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { - // Get the next Instruction before we try to legalize, because there's a - // good chance MI will be deleted. - NextMI = std::next(MI); - Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder); + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + // Only legalize pre-isel generic instructions. + // Legalization process could generate Target specific pseudo + // instructions with generic types. Don't record them + if (isPreISelGenericOpcode(MI->getOpcode())) { + if (isArtifact(*MI)) + ArtifactList.insert(MI); + else + InstList.insert(MI); } + DEBUG(dbgs() << ".. .. New MI: " << *MI;); + }); + const LegalizerInfo &LInfo(Helper.getLegalizerInfo()); + LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo); + auto RemoveDeadInstFromLists = [&InstList, + &ArtifactList](MachineInstr *DeadMI) { + InstList.remove(DeadMI); + ArtifactList.remove(DeadMI); + }; + bool Changed = false; + do { + while (!InstList.empty()) { + MachineInstr &MI = *InstList.pop_back_val(); + assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); + if (isTriviallyDead(MI, MRI)) { + DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + + // Do the legalization for this instruction. + auto Res = Helper.legalizeInstrStep(MI); + // Error out if we couldn't legalize this instruction. We may want to + // fall back to DAG ISel instead in the future. + if (Res == LegalizerHelper::UnableToLegalize) { + Helper.MIRBuilder.stopRecordingInsertions(); + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", MI); + return false; + } + Changed |= Res == LegalizerHelper::Legalized; + } + while (!ArtifactList.empty()) { + MachineInstr &MI = *ArtifactList.pop_back_val(); + assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); + if (isTriviallyDead(MI, MRI)) { + DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + RemoveDeadInstFromLists(&MI); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + SmallVector<MachineInstr *, 4> DeadInstructions; + if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) { + for (auto *DeadMI : DeadInstructions) { + DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI); + RemoveDeadInstFromLists(DeadMI); + DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); + } + Changed = true; + continue; + } + // If this was not an artifact (that could be combined away), this might + // need special handling. Add it to InstList, so when it's processed + // there, it has to be legal or specially handled. + else + InstList.insert(&MI); + } + } while (!InstList.empty()); + + // For now don't support if new blocks are inserted - we would need to fix the + // outerloop for that. + if (MF.size() != NumBlocks) { + MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure", + MF.getFunction().getSubprogram(), + /*MBB=*/nullptr); + R << "inserting blocks is not supported yet"; + reportGISelFailure(MF, TPC, MORE, R); + return false; } return Changed; diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 5258370e6680..87a658be4c29 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -17,12 +17,11 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <sstream> #define DEBUG_TYPE "legalizer" @@ -91,6 +90,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; + case TargetOpcode::G_FSUB: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; + case TargetOpcode::G_FMUL: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; + case TargetOpcode::G_FDIV: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; case TargetOpcode::G_FREM: return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; case TargetOpcode::G_FPOW: @@ -128,7 +136,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); MIRBuilder.setInstr(MI); @@ -146,6 +154,9 @@ LegalizerHelper::libcall(MachineInstr &MI) { break; } case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); @@ -169,12 +180,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIRBuilder.setInstr(MI); + int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + int64_t NarrowSize = NarrowTy.getSizeInBits(); + switch (MI.getOpcode()) { default: return UnableToLegalize; case TargetOpcode::G_IMPLICIT_DEF: { - int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / - NarrowTy.getSizeInBits(); + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> DstRegs; for (int i = 0; i < NumParts; ++i) { @@ -187,9 +204,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_ADD: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; // Expand in terms of carry-setting/consuming G_ADDE instructions. - int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / - NarrowTy.getSizeInBits(); + int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); @@ -217,9 +237,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (TypeIdx != 1) return UnableToLegalize; - int64_t NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize; + int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + // FIXME: add support for when SizeOp1 isn't an exact multiple of + // NarrowSize. + if (SizeOp1 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp1 / NarrowSize; SmallVector<unsigned, 2> SrcRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; @@ -266,12 +289,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_INSERT: { - if (TypeIdx != 0) + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; - int64_t NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> SrcRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; @@ -326,9 +349,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_LOAD: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -353,9 +378,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_STORE: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -377,11 +404,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_CONSTANT: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); SmallVector<unsigned, 2> DstRegs; for (int i = 0; i < NumParts; ++i) { @@ -396,6 +425,53 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_OR: { + // Legalize bitwise operation: + // A = BinOp<Ty> B, C + // into: + // B1, ..., BN = G_UNMERGE_VALUES B + // C1, ..., CN = G_UNMERGE_VALUES C + // A1 = BinOp<Ty/N> B1, C2 + // ... + // AN = BinOp<Ty/N> BN, CN + // A = G_MERGE_VALUES A1, ..., AN + + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector<unsigned, 2> DstRegs; + // List the registers where the first argument will be split. + SmallVector<unsigned, 2> SrcsReg1; + // List the registers where the second argument will be split. + SmallVector<unsigned, 2> SrcsReg2; + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); + + DstRegs.push_back(DstReg); + SrcsReg1.push_back(SrcReg1); + SrcsReg2.push_back(SrcReg2); + } + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); + MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) + MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); + + // Gather the destination registers into the final destination. + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } @@ -597,22 +673,58 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FCMP: { + unsigned Op0Ext, Op1Ext, DstReg; + unsigned Cmp1 = MI.getOperand(2).getReg(); + unsigned Cmp2 = MI.getOperand(3).getReg(); + if (TypeIdx == 0) { + Op0Ext = Cmp1; + Op1Ext = Cmp2; + DstReg = MRI.createGenericVirtualRegister(WideTy); + } else { + Op0Ext = MRI.createGenericVirtualRegister(WideTy); + Op1Ext = MRI.createGenericVirtualRegister(WideTy); + DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op0Ext, Cmp1); + MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op1Ext, Cmp2); + } + MIRBuilder.buildFCmp( + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), + DstReg, Op0Ext, Op1Ext); + if (TypeIdx == 0) + MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(), + DstReg); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ICMP: { - assert(TypeIdx == 1 && "unable to legalize predicate"); bool IsSigned = CmpInst::isSigned( static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate())); - unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy); - unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy); - if (IsSigned) { - MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg()); - MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg()); + unsigned Cmp1 = MI.getOperand(2).getReg(); + unsigned Cmp2 = MI.getOperand(3).getReg(); + unsigned Op0Ext, Op1Ext, DstReg; + if (TypeIdx == 0) { + Op0Ext = Cmp1; + Op1Ext = Cmp2; + DstReg = MRI.createGenericVirtualRegister(WideTy); } else { - MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg()); - MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg()); + Op0Ext = MRI.createGenericVirtualRegister(WideTy); + Op1Ext = MRI.createGenericVirtualRegister(WideTy); + DstReg = MI.getOperand(0).getReg(); + if (IsSigned) { + MIRBuilder.buildSExt(Op0Ext, Cmp1); + MIRBuilder.buildSExt(Op1Ext, Cmp2); + } else { + MIRBuilder.buildZExt(Op0Ext, Cmp1); + MIRBuilder.buildZExt(Op1Ext, Cmp2); + } } MIRBuilder.buildICmp( static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), - MI.getOperand(0).getReg(), Op0Ext, Op1Ext); + DstReg, Op0Ext, Op1Ext); + if (TypeIdx == 0) + MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(), + DstReg); MI.eraseFromParent(); return Legalized; } @@ -623,6 +735,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MI.getOperand(2).setReg(OffsetExt); return Legalized; } + case TargetOpcode::G_PHI: { + assert(TypeIdx == 0 && "Expecting only Idx 0"); + auto getExtendedReg = [&](unsigned Reg, MachineBasicBlock &MBB) { + auto FirstTermIt = MBB.getFirstTerminator(); + MIRBuilder.setInsertPt(MBB, FirstTermIt); + MachineInstr *DefMI = MRI.getVRegDef(Reg); + MachineInstrBuilder MIB; + if (DefMI->getOpcode() == TargetOpcode::G_TRUNC) + MIB = MIRBuilder.buildAnyExtOrTrunc(WideTy, + DefMI->getOperand(1).getReg()); + else + MIB = MIRBuilder.buildAnyExt(WideTy, Reg); + return MIB->getOperand(0).getReg(); + }; + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, WideTy); + for (auto OpIt = MI.operands_begin() + 1, OpE = MI.operands_end(); + OpIt != OpE;) { + unsigned Reg = OpIt++->getReg(); + MachineBasicBlock *OpMBB = OpIt++->getMBB(); + MIB.addReg(getExtendedReg(Reg, *OpMBB)); + MIB.addMBB(OpMBB); + } + auto *MBB = MI.getParent(); + MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), + MIB->getOperand(0).getReg()); + MI.eraseFromParent(); + return Legalized; + } } } @@ -683,7 +824,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; unsigned Res = MI.getOperand(0).getReg(); Type *ZeroTy; - LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (Ty.getSizeInBits()) { case 16: ZeroTy = Type::getHalfTy(Ctx); @@ -726,6 +867,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { + unsigned OldValRes = MI.getOperand(0).getReg(); + unsigned SuccessRes = MI.getOperand(1).getReg(); + unsigned Addr = MI.getOperand(2).getReg(); + unsigned CmpVal = MI.getOperand(3).getReg(); + unsigned NewVal = MI.getOperand(4).getReg(); + MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, + **MI.memoperands_begin()); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); + MI.eraseFromParent(); + return Legalized; + } } } @@ -741,7 +894,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_ADD: { unsigned NarrowSize = NarrowTy.getSizeInBits(); unsigned DstReg = MI.getOperand(0).getReg(); - int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize; + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; MIRBuilder.setInstr(MI); diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 76917aa9660d..9c27c59a0654 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -22,51 +22,136 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOpcodes.h" #include <algorithm> -#include <cassert> -#include <tuple> -#include <utility> - +#include <map> using namespace llvm; -LegalizerInfo::LegalizerInfo() { - DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar; - - // FIXME: these two can be legalized to the fundamental load/store Jakob - // proposed. Once loads & stores are supported. - DefaultActions[TargetOpcode::G_ANYEXT] = Legal; - DefaultActions[TargetOpcode::G_TRUNC] = Legal; +LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { + // Set defaults. + // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the + // fundamental load/store Jakob proposed. Once loads & stores are supported. + setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}}); - DefaultActions[TargetOpcode::G_INTRINSIC] = Legal; - DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal; + setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); - DefaultActions[TargetOpcode::G_ADD] = NarrowScalar; - DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar; - DefaultActions[TargetOpcode::G_STORE] = NarrowScalar; + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall); - DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; - DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; - DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar; - DefaultActions[TargetOpcode::G_FNEG] = Lower; + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall); + setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}}); } void LegalizerInfo::computeTables() { - for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) { - for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) { - for (auto &Action : Actions[Opcode][Idx]) { - LLT Ty = Action.first; - if (!Ty.isVector()) - continue; - - auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp, - Ty.getElementType())]; - Entry = std::max(Entry, Ty.getNumElements()); + assert(TablesInitialized == false); + + for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { + const unsigned Opcode = FirstOp + OpcodeIdx; + for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); + ++TypeIdx) { + // 0. Collect information specified through the setAction API, i.e. + // for specific bit sizes. + // For scalar types: + SizeAndActionsVec ScalarSpecifiedActions; + // For pointer types: + std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; + // For vector types: + std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; + for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { + const LLT Type = LLT2Action.first; + const LegalizeAction Action = LLT2Action.second; + + auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); + if (Type.isPointer()) + AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( + SizeAction); + else if (Type.isVector()) + ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] + .push_back(SizeAction); + else + ScalarSpecifiedActions.push_back(SizeAction); + } + + // 1. Handle scalar types + { + // Decide how to handle bit sizes for which no explicit specification + // was given. + SizeChangeStrategy S = &unsupportedForDifferentSizes; + if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && + ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; + std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end()); + checkPartialSizeAndActionsVector(ScalarSpecifiedActions); + setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); } + + // 2. Handle pointer types + for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { + std::sort(PointerSpecifiedActions.second.begin(), + PointerSpecifiedActions.second.end()); + checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); + // For pointer types, we assume that there isn't a meaningfull way + // to change the number of bits used in the pointer. + setPointerAction( + Opcode, TypeIdx, PointerSpecifiedActions.first, + unsupportedForDifferentSizes(PointerSpecifiedActions.second)); + } + + // 3. Handle vector types + SizeAndActionsVec ElementSizesSeen; + for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { + std::sort(VectorSpecifiedActions.second.begin(), + VectorSpecifiedActions.second.end()); + const uint16_t ElementSize = VectorSpecifiedActions.first; + ElementSizesSeen.push_back({ElementSize, Legal}); + checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); + // For vector types, we assume that the best way to adapt the number + // of elements is to the next larger number of elements type for which + // the vector type is legal, unless there is no such type. In that case, + // legalize towards a vector type with a smaller number of elements. + SizeAndActionsVec NumElementsActions; + for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { + assert(BitsizeAndAction.first % ElementSize == 0); + const uint16_t NumElements = BitsizeAndAction.first / ElementSize; + NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); + } + setVectorNumElementAction( + Opcode, TypeIdx, ElementSize, + moreToWiderTypesAndLessToWidest(NumElementsActions)); + } + std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); + SizeChangeStrategy VectorElementSizeChangeStrategy = + &unsupportedForDifferentSizes; + if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + VectorElementSizeChangeStrategy = + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; + setScalarInVectorAction( + Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); } } @@ -82,62 +167,23 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { assert(TablesInitialized && "backend forgot to call computeTables"); // These *have* to be implemented for now, they're the fundamental basis of // how everything else is transformed. + if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) + return findScalarLegalAction(Aspect); + assert(Aspect.Type.isVector()); + return findVectorLegalAction(Aspect); +} - // FIXME: the long-term plan calls for expansion in terms of load/store (if - // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || - Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) - return std::make_pair(Legal, Aspect.Type); - - LLT Ty = Aspect.Type; - LegalizeAction Action = findInActions(Aspect); - // LegalizerHelper is not able to handle non-power-of-2 types right now, so do - // not try to legalize them unless they are marked as Legal or Custom. - // FIXME: This is a temporary hack until the general non-power-of-2 - // legalization works. - if (!isPowerOf2_64(Ty.getSizeInBits()) && - !(Action == Legal || Action == Custom)) - return std::make_pair(Unsupported, LLT()); - - if (Action != NotFound) - return findLegalAction(Aspect, Action); - - unsigned Opcode = Aspect.Opcode; - if (!Ty.isVector()) { - auto DefaultAction = DefaultActions.find(Aspect.Opcode); - if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal) - return std::make_pair(Legal, Ty); - - if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower) - return std::make_pair(Lower, Ty); - - if (DefaultAction == DefaultActions.end() || - DefaultAction->second != NarrowScalar) - return std::make_pair(Unsupported, LLT()); - return findLegalAction(Aspect, NarrowScalar); - } - - LLT EltTy = Ty.getElementType(); - int NumElts = Ty.getNumElements(); - - auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy)); - if (ScalarAction != ScalarInVectorActions.end() && - ScalarAction->second != Legal) - return findLegalAction(Aspect, ScalarAction->second); - - // The element type is legal in principle, but the number of elements is - // wrong. - auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy)); - if (MaxLegalElts > NumElts) - return findLegalAction(Aspect, MoreElements); - - if (MaxLegalElts == 0) { - // Scalarize if there's no legal vector type, which is just a special case - // of FewerElements. - return std::make_pair(FewerElements, EltTy); - } - - return findLegalAction(Aspect, FewerElements); +/// Helper function to get LLT for the given type index. +static LLT getTypeFromTypeIdx(const MachineInstr &MI, + const MachineRegisterInfo &MRI, unsigned OpIdx, + unsigned TypeIdx) { + assert(TypeIdx < MI.getNumOperands() && "Unexpected TypeIdx"); + // G_UNMERGE_VALUES has variable number of operands, but there is only + // one source type and one destination type as all destinations must be the + // same type. So, get the last operand if TypeIdx == 1. + if (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && TypeIdx == 1) + return MRI.getType(MI.getOperand(MI.getNumOperands() - 1).getReg()); + return MRI.getType(MI.getOperand(OpIdx).getReg()); } std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> @@ -145,19 +191,20 @@ LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { SmallBitVector SeenTypes(8); const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; + // FIXME: probably we'll need to cache the results here somehow? for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { if (!OpInfo[i].isGenericType()) continue; - // We don't want to repeatedly check the same operand index, that - // could get expensive. + // We must only record actions once for each TypeIdx; otherwise we'd + // try to legalize operands multiple times down the line. unsigned TypeIdx = OpInfo[i].getGenericTypeIndex(); if (SeenTypes[TypeIdx]) continue; SeenTypes.set(TypeIdx); - LLT Ty = MRI.getType(MI.getOperand(i).getReg()); + LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx); auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); if (Action.first != Legal) return std::make_tuple(Action.first, TypeIdx, Action.second); @@ -170,38 +217,166 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, return std::get<0>(getAction(MI, MRI)) == Legal; } -Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect, - LegalizeAction Action) const { - switch(Action) { - default: - llvm_unreachable("Cannot find legal type"); +bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + return false; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( + const SizeAndActionsVec &v, LegalizeAction IncreaseAction, + LegalizeAction DecreaseAction) { + SizeAndActionsVec result; + unsigned LargestSizeSoFar = 0; + if (v.size() >= 1 && v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + LargestSizeSoFar = v[i].first; + if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) { + result.push_back({LargestSizeSoFar + 1, IncreaseAction}); + LargestSizeSoFar = v[i].first + 1; + } + } + result.push_back({LargestSizeSoFar + 1, DecreaseAction}); + return result; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest( + const SizeAndActionsVec &v, LegalizeAction DecreaseAction, + LegalizeAction IncreaseAction) { + SizeAndActionsVec result; + if (v.size() == 0 || v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) { + result.push_back({v[i].first + 1, DecreaseAction}); + } + } + return result; +} + +LegalizerInfo::SizeAndAction +LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { + assert(Size >= 1); + // Find the last element in Vec that has a bitsize equal to or smaller than + // the requested bit size. + // That is the element just before the first element that is bigger than Size. + auto VecIt = std::upper_bound( + Vec.begin(), Vec.end(), Size, + [](const uint32_t Size, const SizeAndAction lhs) -> bool { + return Size < lhs.first; + }); + assert(VecIt != Vec.begin() && "Does Vec not start with size 1?"); + --VecIt; + int VecIdx = VecIt - Vec.begin(); + + LegalizeAction Action = Vec[VecIdx].second; + switch (Action) { case Legal: case Lower: case Libcall: case Custom: - return Aspect.Type; + return {Size, Action}; + case FewerElements: + // FIXME: is this special case still needed and correct? + // Special case for scalarization: + if (Vec == SizeAndActionsVec({{1, FewerElements}})) + return {1, FewerElements}; + LLVM_FALLTHROUGH; case NarrowScalar: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); - } - case WidenScalar: { - return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT { - return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); - }); - } - case FewerElements: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); }); + // The following needs to be a loop, as for now, we do allow needing to + // go over "Unsupported" bit sizes before finding a legalizable bit size. + // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8, + // we need to iterate over s9, and then to s32 to return (s32, Legal). + // If we want to get rid of the below loop, we should have stronger asserts + // when building the SizeAndActionsVecs, probably not allowing + // "Unsupported" unless at the ends of the vector. + for (int i = VecIdx - 1; i >= 0; --i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); } + case WidenScalar: case MoreElements: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); + // See above, the following needs to be a loop, at least for now. + for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); } + case Unsupported: + return {Size, Unsupported}; + case NotFound: + llvm_unreachable("NotFound"); } + llvm_unreachable("Action has an unknown enum value"); } -bool LegalizerInfo::legalizeCustom(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - return false; +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, LLT()}; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + if (Aspect.Type.isPointer() && + AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == + AddrSpace2PointerActions[OpcodeIdx].end()) { + return {NotFound, LLT()}; + } + const SmallVector<SizeAndActionsVec, 1> &Actions = + Aspect.Type.isPointer() + ? AddrSpace2PointerActions[OpcodeIdx] + .find(Aspect.Type.getAddressSpace()) + ->second + : ScalarActions[OpcodeIdx]; + if (Aspect.Idx >= Actions.size()) + return {NotFound, LLT()}; + const SizeAndActionsVec &Vec = Actions[Aspect.Idx]; + // FIXME: speed up this search, e.g. by using a results cache for repeated + // queries? + auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits()); + return {SizeAndAction.second, + Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first) + : LLT::pointer(Aspect.Type.getAddressSpace(), + SizeAndAction.first)}; +} + +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isVector()); + // First legalize the vector element size, then legalize the number of + // lanes in the vector. + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, Aspect.Type}; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + const unsigned TypeIdx = Aspect.Idx; + if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) + return {NotFound, Aspect.Type}; + const SizeAndActionsVec &ElemSizeVec = + ScalarInVectorActions[OpcodeIdx][TypeIdx]; + + LLT IntermediateType; + auto ElementSizeAndAction = + findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits()); + IntermediateType = + LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first); + if (ElementSizeAndAction.second != Legal) + return {ElementSizeAndAction.second, IntermediateType}; + + auto i = NumElements2Actions[OpcodeIdx].find( + IntermediateType.getScalarSizeInBits()); + if (i == NumElements2Actions[OpcodeIdx].end()) { + return {NotFound, IntermediateType}; + } + const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx]; + auto NumElementsAndAction = + findAction(NumElementsVec, IntermediateType.getNumElements()); + return {NumElementsAndAction.second, + LLT::vector(NumElementsAndAction.first, + IntermediateType.getScalarSizeInBits())}; } diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index c5d0999fe438..8e16470b6f90 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -101,7 +101,8 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { // Don't try to be smart for the insertion point. // There is no guarantee that the first seen use is the first // use in the block. - InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI); + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), + LocalizedMI); // Set a new register for the definition. unsigned NewReg = @@ -112,7 +113,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; DEBUG(dbgs() << "Inserted: " << *LocalizedMI); } - DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second) + DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) << '\n'); // Update the user reg. MOUse.setReg(NewVRegIt->second); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 4636806c3f08..475bb82e5b9c 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOpcodes.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -83,30 +83,26 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { return MIB; } -MachineInstrBuilder MachineIRBuilder::buildDirectDbgValue( - unsigned Reg, const MDNode *Variable, const MDNode *Expr) { +MachineInstrBuilder +MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return buildInstr(TargetOpcode::DBG_VALUE) - .addReg(Reg, RegState::Debug) - .addReg(0, RegState::Debug) - .addMetadata(Variable) - .addMetadata(Expr); + return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE), + /*IsIndirect*/ false, Reg, Variable, Expr)); } -MachineInstrBuilder MachineIRBuilder::buildIndirectDbgValue( - unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr) { +MachineInstrBuilder +MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return buildInstr(TargetOpcode::DBG_VALUE) - .addReg(Reg, RegState::Debug) - .addImm(Offset) - .addMetadata(Variable) - .addMetadata(Expr); + return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE), + /*IsIndirect*/ true, Reg, Variable, Expr)); } MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, @@ -124,7 +120,6 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, - unsigned Offset, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); @@ -144,7 +139,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addImm(Offset).addMetadata(Variable).addMetadata(Expr); + return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) { @@ -268,7 +263,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, const ConstantInt *NewVal = &Val; if (Ty.getSizeInBits() != Val.getBitWidth()) - NewVal = ConstantInt::get(MF->getFunction()->getContext(), + NewVal = ConstantInt::get(MF->getFunction().getContext(), Val.getValue().sextOrTrunc(Ty.getSizeInBits())); return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal); @@ -276,7 +271,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, int64_t Val) { - auto IntN = IntegerType::get(MF->getFunction()->getContext(), + auto IntN = IntegerType::get(MF->getFunction().getContext(), MRI->getType(Res).getSizeInBits()); ConstantInt *CI = ConstantInt::get(IntN, Val, true); return buildConstant(Res, *CI); @@ -351,14 +346,17 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) { return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, - unsigned Op) { +MachineInstrBuilder +MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, unsigned Res, unsigned Op) { + assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc || + TargetOpcode::G_SEXT == ExtOpc) && + "Expecting Extending Opc"); assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); unsigned Opcode = TargetOpcode::COPY; if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) - Opcode = TargetOpcode::G_SEXT; + Opcode = ExtOpc; else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) Opcode = TargetOpcode::G_TRUNC; else @@ -367,20 +365,19 @@ MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, return buildInstr(Opcode).addDef(Res).addUse(Op); } -MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res, +MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, unsigned Op) { - assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); - assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); + return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op); +} - unsigned Opcode = TargetOpcode::COPY; - if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) - Opcode = TargetOpcode::G_ZEXT; - else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) - Opcode = TargetOpcode::G_TRUNC; - else - assert(MRI->getType(Res) == MRI->getType(Op)); +MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res, + unsigned Op) { + return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op); +} - return buildInstr(Opcode).addDef(Res).addUse(Op); +MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(unsigned Res, + unsigned Op) { + return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op); } MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) { @@ -661,6 +658,31 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, .addUse(Idx); } +MachineInstrBuilder +MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = MRI->getType(OldValRes); + LLT AddrTy = MRI->getType(Addr); + LLT CmpValTy = MRI->getType(CmpVal); + LLT NewValTy = MRI->getType(NewVal); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(CmpValTy.isValid() && "invalid operand type"); + assert(NewValTy.isValid() && "invalid operand type"); + assert(OldValResTy == CmpValTy && "type mismatch"); + assert(OldValResTy == NewValTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG) + .addDef(OldValRes) + .addUse(Addr) + .addUse(CmpVal) + .addUse(NewVal) + .addMemOperand(&MMO); +} + void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend) { #ifndef NDEBUG diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 677941dbbf6d..006c9ea23034 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -26,9 +26,12 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" @@ -36,9 +39,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOpcodes.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -159,7 +159,7 @@ bool RegBankSelect::repairReg( // same types because the type is a placeholder when this function is called. MachineInstr *MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); - DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst) + DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) << '\n'); // TODO: // Check if MI is legal. if not, we need to legalize all the @@ -221,9 +221,8 @@ uint64_t RegBankSelect::getRepairCost( // into a new virtual register. // We would also need to propagate this information in the // repairing placement. - unsigned Cost = - RBI->copyCost(*DesiredRegBrank, *CurRegBank, - RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI)); + unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank, + RBI->getSizeInBits(MO.getReg(), *MRI, *TRI)); // TODO: use a dedicated constant for ImpossibleCost. if (Cost != std::numeric_limits<unsigned>::max()) return Cost; @@ -602,9 +601,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { return false; DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); Mode SaveOptMode = OptMode; - if (F->hasFnAttribute(Attribute::OptimizeNone)) + if (F.hasFnAttribute(Attribute::OptimizeNone)) OptMode = Mode::Fast; init(MF); diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 83b21e637097..4d3ae69d3a9d 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #define DEBUG_TYPE "registerbank" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index a841902feed1..b3d9209ae6eb 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -19,13 +19,12 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOpcodes.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> // For std::max. @@ -84,7 +83,7 @@ const RegisterBank * RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg)); + return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); assert(Reg && "NoRegister does not have a register bank"); const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -95,6 +94,19 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, return nullptr; } +const TargetRegisterClass & +RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg, + const TargetRegisterInfo &TRI) const { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Reg must be a physreg"); + const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); + if (RegRCIt != PhysRegMinimalRCs.end()) + return *RegRCIt->second; + const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); + PhysRegMinimalRCs[Reg] = PhysRC; + return *PhysRC; +} + const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) const { @@ -151,7 +163,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // is important. The rest is not constrained. unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -419,16 +431,20 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { } unsigned OrigReg = MO.getReg(); unsigned NewReg = *NewRegs.begin(); - DEBUG(dbgs() << " changed, replace " << PrintReg(OrigReg, nullptr)); + DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); MO.setReg(NewReg); - DEBUG(dbgs() << " with " << PrintReg(NewReg, nullptr)); + DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); // The OperandsMapper creates plain scalar, we may have to fix that. // Check if the types match and if not, fix that. LLT OrigTy = MRI.getType(OrigReg); LLT NewTy = MRI.getType(NewReg); if (OrigTy != NewTy) { - assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() && + // The default mapping is not supposed to change the size of + // the storage. However, right now we don't necessarily bump all + // the types to storage size. For instance, we can consider + // s16 G_AND legal whereas the storage size is going to be 32. + assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() && "Types with difference size cannot be handled by the default " "mapping"); DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " @@ -441,13 +457,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI) const { const TargetRegisterClass *RC = nullptr; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. - RC = TRI.getMinimalPhysRegClass(Reg); + RC = &getMinimalPhysRegClass(Reg, TRI); } else { LLT Ty = MRI.getType(Reg); unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; @@ -543,10 +559,11 @@ bool RegisterBankInfo::InstructionMapping::verify( // For PHI, we only care about mapping the definition. assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) && "NumOperands must match, see constructor"); - assert(MI.getParent() && MI.getParent()->getParent() && + assert(MI.getParent() && MI.getMF() && "MI must be connected to a MachineFunction"); - const MachineFunction &MF = *MI.getParent()->getParent(); - (void)MF; + const MachineFunction &MF = *MI.getMF(); + const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); + (void)RBI; for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { const MachineOperand &MO = MI.getOperand(Idx); @@ -564,7 +581,7 @@ bool RegisterBankInfo::InstructionMapping::verify( (void)MOMapping; // Register size in bits. // This size must match what the mapping expects. - assert(MOMapping.verify(getSizeInBits( + assert(MOMapping.verify(RBI->getSizeInBits( Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) && "Value mapping is invalid"); } @@ -725,8 +742,8 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, // If we have a function, we can pretty print the name of the registers. // Otherwise we will print the raw numbers. const TargetRegisterInfo *TRI = - getMI().getParent() && getMI().getParent()->getParent() - ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo() + getMI().getParent() && getMI().getMF() + ? getMI().getMF()->getSubtarget().getRegisterInfo() : nullptr; bool IsFirst = true; for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { @@ -735,13 +752,13 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, if (!IsFirst) OS << ", "; IsFirst = false; - OS << '(' << PrintReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; + OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; bool IsFirstNewVReg = true; for (unsigned VReg : getVRegs(Idx)) { if (!IsFirstNewVReg) OS << ", "; IsFirstNewVReg = false; - OS << PrintReg(VReg, TRI); + OS << printReg(VReg, TRI); } OS << "])"; } diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 5ecaf5c563f8..ef990b49aceb 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -17,10 +17,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #define DEBUG_TYPE "globalisel-utils" @@ -99,7 +99,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, const MachineInstr &MI) { MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ", MI.getDebugLoc(), MI.getParent()); - R << Msg << ": " << ore::MNV("Inst", MI); + R << Msg; + // Printing MI is expensive; only do it if expensive remarks are enabled. + if (MORE.allowExtraAnalysis(PassName)) + R << ": " << ore::MNV("Inst", MI); reportGISelFailure(MF, TPC, MORE, R); } @@ -126,3 +129,19 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, return nullptr; return MI->getOperand(1).getFPImm(); } + +llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg, + const MachineRegisterInfo &MRI) { + auto *DefMI = MRI.getVRegDef(Reg); + auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); + if (!DstTy.isValid()) + return nullptr; + while (DefMI->getOpcode() == TargetOpcode::COPY) { + unsigned SrcReg = DefMI->getOperand(1).getReg(); + auto SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isValid() || SrcTy != DstTy) + break; + DefMI = MRI.getVRegDef(SrcReg); + } + return DefMI->getOpcode() == Opcode ? DefMI : nullptr; +} diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index c6ca49ce24d7..8b9545da914e 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -1,4 +1,4 @@ -//===-- GlobalMerge.cpp - Internal globals merging -----------------------===// +//===- GlobalMerge.cpp - Internal globals merging -------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// // This pass merges globals with internal linkage into one. This way all the // globals which were merged into a biggest one can be addressed using offsets // from the same base pointer (no need for separate base pointer for each of the @@ -57,30 +58,45 @@ // - it can increase register pressure when the uses are disparate enough. // // We use heuristics to discover the best global grouping we can (cf cl::opts). +// // ===---------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <string> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "global-merge" @@ -117,9 +133,12 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage")); STATISTIC(NumMerged, "Number of globals merged"); + namespace { + class GlobalMerge : public FunctionPass { - const TargetMachine *TM; + const TargetMachine *TM = nullptr; + // FIXME: Infer the maximum possible offset depending on the actual users // (these max offsets are different for the users inside Thumb or ARM // functions), see the code that passes in the offset in the ARM backend @@ -130,15 +149,16 @@ namespace { /// Currently, this applies a dead simple heuristic: only consider globals /// used in minsize functions for merging. /// FIXME: This could learn about optsize, and be used in the cost model. - bool OnlyOptimizeForSize; + bool OnlyOptimizeForSize = false; /// Whether we should merge global variables that have external linkage. - bool MergeExternalGlobals; + bool MergeExternalGlobals = false; bool IsMachO; bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Merge everything in \p Globals for which the corresponding bit /// in \p GlobalSet is set. bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, @@ -164,9 +184,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. + explicit GlobalMerge() - : FunctionPass(ID), TM(nullptr), MaxOffset(GlobalMergeMaxOffset), - OnlyOptimizeForSize(false), MergeExternalGlobals(false) { + : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -189,9 +209,11 @@ namespace { FunctionPass::getAnalysisUsage(AU); } }; + } // end anonymous namespace char GlobalMerge::ID = 0; + INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false) bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, @@ -231,9 +253,10 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // We keep track of the sets of globals used together "close enough". struct UsedGlobalSet { - UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {} BitVector Globals; - unsigned UsageCount; + unsigned UsageCount = 1; + + UsedGlobalSet(size_t Size) : Globals(Size) {} }; // Each set is unique in UsedGlobalSets. @@ -363,7 +386,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // // Multiply that by the size of the set to give us a crude profitability // metric. - std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(), + std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(), [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) { return UGS1.Globals.count() * UGS1.UsageCount < UGS2.Globals.count() * UGS2.UsageCount; @@ -545,7 +568,7 @@ bool GlobalMerge::doInitialization(Module &M) { IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO(); auto &DL = M.getDataLayout(); - DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, + DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals, BSSGlobals; bool Changed = false; setMustKeepGlobalVariables(M); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index ff8405366173..a22ce0dab9c2 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1,4 +1,4 @@ -//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +//===- IfConversion.cpp - Machine code if conversion pass -----------------===// // // The LLVM Compiler Infrastructure // @@ -16,26 +16,41 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <functional> +#include <iterator> +#include <memory> #include <utility> +#include <vector> using namespace llvm; @@ -77,6 +92,7 @@ STATISTIC(NumDupBBs, "Number of duplicated blocks"); STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); namespace { + class IfConverter : public MachineFunctionPass { enum IfcvtKind { ICNotClassfied, // BB data valid, but not classified. @@ -125,21 +141,20 @@ namespace { bool IsUnpredicable : 1; bool CannotBeCopied : 1; bool ClobbersPred : 1; - unsigned NonPredSize; - unsigned ExtraCost; - unsigned ExtraCost2; - MachineBasicBlock *BB; - MachineBasicBlock *TrueBB; - MachineBasicBlock *FalseBB; + unsigned NonPredSize = 0; + unsigned ExtraCost = 0; + unsigned ExtraCost2 = 0; + MachineBasicBlock *BB = nullptr; + MachineBasicBlock *TrueBB = nullptr; + MachineBasicBlock *FalseBB = nullptr; SmallVector<MachineOperand, 4> BrCond; SmallVector<MachineOperand, 4> Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), IsBrReversible(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), - ClobbersPred(false), NonPredSize(0), ExtraCost(0), - ExtraCost2(0), BB(nullptr), TrueBB(nullptr), - FalseBB(nullptr) {} + ClobbersPred(false) {} }; /// Record information about pending if-conversions to attempt: @@ -161,6 +176,7 @@ namespace { bool NeedSubsumption : 1; bool TClobbersPred : 1; bool FClobbersPred : 1; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, bool tc = false, bool fc = false) : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), @@ -179,17 +195,17 @@ namespace { MachineRegisterInfo *MRI; LivePhysRegs Redefs; - LivePhysRegs DontKill; bool PreRegAlloc; bool MadeChange; - int FnNum; + int FnNum = -1; std::function<bool(const MachineFunction &)> PredicateFtor; public: static char ID; + IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr) - : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) { + : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } @@ -242,7 +258,6 @@ namespace { void AnalyzeBlocks(MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens); void InvalidatePreds(MachineBasicBlock &MBB); - void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -311,8 +326,9 @@ namespace { } }; - char IfConverter::ID = 0; -} +} // end anonymous namespace + +char IfConverter::ID = 0; char &llvm::IfConverterID = IfConverter::ID; @@ -321,7 +337,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) + if (skipFunction(MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) return false; const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -390,12 +406,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? - " false" : "") - << "): BB#" << BBI.BB->getNumber() << " (" - << ((Kind == ICSimpleFalse) - ? BBI.FalseBB->getNumber() - : BBI.TrueBB->getNumber()) << ") "); + DEBUG(dbgs() << "Ifcvt (Simple" + << (Kind == ICSimpleFalse ? " false" : "") + << "): " << printMBBReference(*BBI.BB) << " (" + << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) + << ") "); RetVal = IfConvertSimple(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { @@ -419,9 +435,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " false"); if (isRev) DEBUG(dbgs() << " rev"); - DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "); + DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { @@ -435,24 +451,22 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } break; } - case ICDiamond: { + case ICDiamond: if (DisableDiamond) break; - DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "); + DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2, Token->TClobbersPred, Token->FClobbersPred); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) ++NumDiamonds; break; - } - case ICForkedDiamond: { + case ICForkedDiamond: if (DisableForkedDiamond) break; - DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#" - << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "); + DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, Token->TClobbersPred, Token->FClobbersPred); @@ -460,7 +474,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (RetVal) ++NumForkedDiamonds; break; } - } + + if (RetVal && MRI->tracksLiveness()) + recomputeLivenessFlags(*BBI.BB); Change |= RetVal; @@ -616,7 +632,6 @@ bool IfConverter::CountDuplicatedInstructions( unsigned &Dups1, unsigned &Dups2, MachineBasicBlock &TBB, MachineBasicBlock &FBB, bool SkipUnconditionalBranches) const { - while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. TIB = skipDebugInstructionsForward(TIB, TIE); @@ -1342,19 +1357,10 @@ static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB, TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl); } -/// Remove true / false edges if either / both are no longer successors. -void IfConverter::RemoveExtraEdges(BBInfo &BBI) { - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector<MachineOperand, 4> Cond; - if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond)) - BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); -} - /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are also live/used by MI. static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { - const TargetRegisterInfo *TRI = MI.getParent()->getParent() - ->getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MI.getMF()->getSubtarget().getRegisterInfo(); // Before stepping forward past MI, remember which regs were live // before MI. This is needed to set the Undef flag only when reg is @@ -1374,7 +1380,7 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { unsigned Reg = Clobber.first; MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second); MachineInstr *OpMI = Op.getParent(); - MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI); + MachineInstrBuilder MIB(*OpMI->getMF(), OpMI); if (Op.isRegMask()) { // First handle regmasks. They clobber any entries in the mask which // means that we need a def for those registers. @@ -1389,13 +1395,6 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { MIB.addReg(Reg, RegState::Implicit | RegState::Define); continue; } - assert(Op.isReg() && "Register operand required"); - if (Op.isDead()) { - // If we found a dead def, but it needs to be live, then remove the dead - // flag. - if (Redefs.contains(Op.getReg())) - Op.setIsDead(false); - } if (LiveBeforeMI.count(Reg)) MIB.addReg(Reg, RegState::Implicit); else { @@ -1412,26 +1411,6 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { } } -/// Remove kill flags from operands with a registers in the \p DontKill set. -static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { - for (MIBundleOperands O(MI); O.isValid(); ++O) { - if (!O->isReg() || !O->isKill()) - continue; - if (DontKill.contains(O->getReg())) - O->setIsKill(false); - } -} - -/// Walks a range of machine instructions and removes kill flags for registers -/// in the \p DontKill set. -static void RemoveKills(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - const LivePhysRegs &DontKill, - const MCRegisterInfo &MCRI) { - for (MachineInstr &MI : make_range(I, E)) - RemoveKills(MI, DontKill); -} - /// If convert a simple (split, no rejoin) sub-CFG. bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; @@ -1462,16 +1441,12 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { llvm_unreachable("Unable to reverse branch condition!"); Redefs.init(*TRI); - DontKill.init(*TRI); if (MRI->tracksLiveness()) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. Redefs.addLiveIns(CvtMBB); Redefs.addLiveIns(NextMBB); - // Compute a set of registers which must not be killed by instructions in - // BB1: This is everything live-in to BB2. - DontKill.addLiveIns(NextMBB); } // Remove the branches from the entry so we can add the contents of the true @@ -1483,15 +1458,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. + // Keep the CFG updated. BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the instructions in the true block. - RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); - // Merge converted block into entry block. + // Merge converted block into entry block. The BB to Cvt edge is removed + // by MergeBlocks. MergeBlocks(BBI, *CvtBBI); } @@ -1512,8 +1486,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; @@ -1578,8 +1550,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { Redefs.addLiveIns(NextMBB); } - DontKill.clear(); - bool HasEarlyExit = CvtBBI->FalseBB != nullptr; BranchProbability CvtNext, CvtFalse, BBNext, BBCvt; @@ -1599,10 +1569,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); - - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); @@ -1612,6 +1578,9 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { MergeBlocks(BBI, *CvtBBI, false); } + // Keep the CFG updated. + BBI.BB->removeSuccessor(&CvtMBB, true); + // If 'true' block has a 'false' successor, add an exit branch to it. if (HasEarlyExit) { SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), @@ -1659,8 +1628,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; @@ -1765,14 +1732,7 @@ bool IfConverter::IfConvertDiamondCommon( --NumDups1; } - // Compute a set of registers which must not be killed by instructions in BB1: - // This is everything used+live in BB2 after the duplicated instructions. We - // can compute this set by simulating liveness backwards from the end of BB2. - DontKill.init(*TRI); if (MRI->tracksLiveness()) { - for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse())) - DontKill.stepBackward(MI); - for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) { SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy; Redefs.stepForward(MI, Dummy); @@ -1802,10 +1762,6 @@ bool IfConverter::IfConvertDiamondCommon( } MBB1.erase(DI1, MBB1.end()); - // Kill flags in the true block for registers living into the false block - // must be removed. - RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI); - DI2 = BBI2->BB->end(); // The branches have been checked to match. Skip over the branch in the false // block so that we don't try to predicate it. @@ -1923,8 +1879,6 @@ bool IfConverter::IfConvertForkedDiamond( TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, TrueBBI.BrCond, dl); - RemoveExtraEdges(BBI); - // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; InvalidatePreds(*BBI.BB); @@ -1961,6 +1915,11 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // fold the tail block in as well. Otherwise, unless it falls through to the // tail, add a unconditional branch to it. if (TailBB) { + // We need to remove the edges to the true and false blocks manually since + // we didn't let IfConvertDiamondCommon update the CFG. + BBI.BB->removeSuccessor(TrueBBI.BB); + BBI.BB->removeSuccessor(FalseBBI.BB, true); + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough && !TailBBI.BB->hasAddressTaken(); @@ -1990,13 +1949,6 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } } - // RemoveExtraEdges won't work if the block has an unanalyzable branch, - // which can happen here if TailBB is unanalyzable and is merged, so - // explicitly remove BBI1 and BBI2 as successors. - BBI.BB->removeSuccessor(TrueBBI.BB); - BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true); - RemoveExtraEdges(BBI); - // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; InvalidatePreds(*BBI.BB); @@ -2101,10 +2053,6 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. UpdatePredRedefs(*MI, Redefs); - - // Some kill flags may not be correct anymore. - if (!DontKill.empty()) - RemoveKills(*MI, DontKill); } if (!IgnoreBr) { @@ -2133,7 +2081,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, /// Move all instructions from FromBB to the end of ToBB. This will leave /// FromBB as an empty block, so remove all of its successor edges except for /// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is -/// being moved, add those successor edges to ToBBI. +/// being moved, add those successor edges to ToBBI and remove the old edge +/// from ToBBI to FromBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { MachineBasicBlock &FromMBB = *FromBBI.BB; assert(!FromMBB.hasAddressTaken() && @@ -2165,12 +2114,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { // AddEdges is true and FromMBB is a successor of ToBBI.BB. auto To2FromProb = BranchProbability::getZero(); if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) { + // Remove the old edge but remember the edge probability so we can calculate + // the correct weights on the new edges being added further down. To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB); - // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the - // edge probability being merged to other edges when this edge is removed - // later. - ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB), - BranchProbability::getZero()); + ToBBI.BB->removeSuccessor(&FromMBB); } for (MachineBasicBlock *Succ : FromSuccs) { @@ -2229,9 +2176,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { } } - // Now FromBBI always falls through to the next block! - if (NBB && !FromMBB.isSuccessor(NBB)) - FromMBB.addSuccessor(NBB); + // Move the now empty FromMBB out of the way to the end of the function so + // it doesn't interfere with fallthrough checks done by canFallThroughTo(). + MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin(); + if (Last != &FromMBB) + FromMBB.moveAfter(Last); // Normalize the probabilities of ToBBI.BB's successors with all adjustment // we've done above. diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index e308f49ec4e8..308b6d293d3d 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -1,4 +1,4 @@ -//===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===// +//===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===// // // The LLVM Compiler Infrastructure // @@ -26,38 +26,50 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/FaultMaps.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Instruction.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <iterator> using namespace llvm; static cl::opt<int> PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), - cl::init(4096)); + cl::init(4096), cl::Hidden); static cl::opt<unsigned> MaxInstsToConsider( "imp-null-max-insts-to-consider", cl::desc("The max number of instructions to consider hoisting loads over " "(the algorithm is quadratic over this number)"), - cl::init(8)); + cl::Hidden, cl::init(8)); #define DEBUG_TYPE "implicit-null-checks" @@ -152,7 +164,6 @@ class ImplicitNullChecks : public MachineFunctionPass { const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; AliasAnalysis *AA = nullptr; - MachineModuleInfo *MMI = nullptr; MachineFrameInfo *MFI = nullptr; bool analyzeBlockForNullChecks(MachineBasicBlock &MBB, @@ -166,6 +177,7 @@ class ImplicitNullChecks : public MachineFunctionPass { AR_MayAlias, AR_WillAliasEverything }; + /// Returns AR_NoAlias if \p MI memory operation does not alias with /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if /// they may alias and any further memory operation may alias with \p PrevMI. @@ -176,6 +188,7 @@ class ImplicitNullChecks : public MachineFunctionPass { SR_Unsuitable, SR_Impossible }; + /// Return SR_Suitable if \p MI a memory operation that can be used to /// implicitly null check the value in \p PointerReg, SR_Unsuitable if /// \p MI cannot be used to null check and SR_Impossible if there is @@ -200,6 +213,7 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -211,7 +225,7 @@ public: } }; -} +} // end anonymous namespace bool ImplicitNullChecks::canHandle(const MachineInstr *MI) { if (MI->isCall() || MI->hasUnmodeledSideEffects()) @@ -230,7 +244,7 @@ ImplicitNullChecks::DependenceResult ImplicitNullChecks::computeDependence(const MachineInstr *MI, ArrayRef<MachineInstr *> Block) { assert(llvm::all_of(Block, canHandle) && "Check this first!"); - assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!"); + assert(!is_contained(Block, MI) && "Block must be exclusive of MI!"); Optional<ArrayRef<MachineInstr *>::iterator> Dep; @@ -280,7 +294,6 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A, bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getRegInfo().getTargetRegisterInfo(); - MMI = &MF.getMMI(); MFI = &MF.getFrameInfo(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); @@ -356,7 +369,7 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() && - Offset < PageSize)) + -PageSize < Offset && Offset < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. @@ -390,8 +403,10 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, // We don't want to reason about speculating loads. Note -- at this point // we should have already filtered out all of the other non-speculatable // things, like calls and stores. + // We also do not want to hoist stores because it might change the memory + // while the FaultingMI may result in faulting. assert(canHandle(DependenceMI) && "Should never have reached here!"); - if (DependenceMI->mayLoad()) + if (DependenceMI->mayLoadOrStore()) return false; for (auto &DependenceMO : DependenceMI->operands()) { @@ -406,7 +421,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, // test %rcx, %rcx // je _null_block // _non_null_block: - // %rdx<def> = INST + // %rdx = INST // ... // // This restriction does not apply to the faulting load inst because in @@ -441,7 +456,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, /// NullCheckList and return true, else return false. bool ImplicitNullChecks::analyzeBlockForNullChecks( MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) { - typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate; + using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; MDNode *BranchMD = nullptr; if (auto *BB = MBB.getBasicBlock()) @@ -483,7 +498,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // Starting with a code fragment like: // - // test %RAX, %RAX + // test %rax, %rax // jne LblNotNull // // LblNull: @@ -493,13 +508,13 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // Inst0 // Inst1 // ... - // Def = Load (%RAX + <offset>) + // Def = Load (%rax + <offset>) // ... // // // we want to end up with // - // Def = FaultingLoad (%RAX + <offset>), LblNull + // Def = FaultingLoad (%rax + <offset>), LblNull // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: @@ -513,11 +528,11 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // To see why this is legal, consider the two possibilities: // - // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the + // 1. %rax is null: since we constrain <offset> to be less than PageSize, the // load instruction dereferences the null page, causing a segmentation // fault. // - // 2. %RAX is not null: in this case we know that the load cannot fault, as + // 2. %rax is not null: in this case we know that the load cannot fault, as // otherwise the load would've faulted in the original program too and the // original program would've been undefined. // @@ -555,7 +570,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( } // If MI re-defines the PointerReg then we cannot move further. - if (any_of(MI.operands(), [&](MachineOperand &MO) { + if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) { return MO.isReg() && MO.getReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), PointerReg); })) @@ -674,9 +689,10 @@ void ImplicitNullChecks::rewriteNullChecks( } } - char ImplicitNullChecks::ID = 0; + char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; + INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index eda4f74c7874..1aaf7a0ceef8 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1,4 +1,4 @@ -//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +//===- InlineSpiller.cpp - Insert spills and restores inline --------------===// // // The LLVM Compiler Infrastructure // @@ -12,31 +12,52 @@ // //===----------------------------------------------------------------------===// +#include "LiveRangeCalc.h" #include "Spiller.h" #include "SplitKit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" +#include <cassert> +#include <iterator> +#include <tuple> +#include <utility> +#include <vector> using namespace llvm; @@ -56,6 +77,7 @@ static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, cl::desc("Disable inline spill hoisting")); namespace { + class HoistSpillHelper : private LiveRangeEdit::Delegate { MachineFunction &MF; LiveIntervals &LIS; @@ -64,7 +86,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; - MachineFrameInfo &MFI; MachineRegisterInfo &MRI; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; @@ -72,13 +93,17 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { InsertPointAnalysis IPA; - // Map from StackSlot to its original register. - DenseMap<int, unsigned> StackSlotToReg; + // Map from StackSlot to the LiveInterval of the original register. + // Note the LiveInterval of the original register may have been deleted + // after it is spilled. We keep a copy here to track the range where + // spills can be moved. + DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI; + // Map from pair of (StackSlot and Original VNI) to a set of spills which // have the same stackslot and have equal values defined by Original VNI. // These spills are mergeable and are hoist candiates. - typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>> - MergeableSpillsMap; + using MergeableSpillsMap = + MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>; MergeableSpillsMap MergeableSpills; /// This is the map from original register to a set containing all its @@ -86,8 +111,8 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { /// sibling there and use it as the source of the new spill. DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap; - bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB, - unsigned &LiveReg); + bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, + MachineBasicBlock &BB, unsigned &LiveReg); void rmRedundantSpills( SmallPtrSet<MachineInstr *, 16> &Spills, @@ -101,7 +126,7 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); - void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI, + void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills, SmallVectorImpl<MachineInstr *> &SpillsToRm, DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns); @@ -114,8 +139,7 @@ public: AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), - MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()), - TII(*mf.getSubtarget().getInstrInfo()), + MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), IPA(LIS, mf.getNumBlockIDs()) {} @@ -135,7 +159,6 @@ class InlineSpiller : public Spiller { MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; - MachineFrameInfo &MFI; MachineRegisterInfo &MRI; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; @@ -163,7 +186,7 @@ class InlineSpiller : public Spiller { // Object records spills information and does the hoisting. HoistSpillHelper HSpiller; - ~InlineSpiller() override {} + ~InlineSpiller() override = default; public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) @@ -172,8 +195,7 @@ public: AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), - MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()), - TII(*mf.getSubtarget().getInstrInfo()), + MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), HSpiller(pass, mf, vrm) {} @@ -196,7 +218,7 @@ private: void reMaterializeAll(); bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); - bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, + bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>, MachineInstr *LoadMI = nullptr); void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); @@ -204,19 +226,17 @@ private: void spillAroundUses(unsigned Reg); void spillAll(); }; -} -namespace llvm { +} // end anonymous namespace -Spiller::~Spiller() { } -void Spiller::anchor() { } +Spiller::~Spiller() = default; -Spiller *createInlineSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm) { - return new InlineSpiller(pass, mf, vrm); -} +void Spiller::anchor() {} +Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + return new InlineSpiller(pass, mf, vrm); } //===----------------------------------------------------------------------===// @@ -340,7 +360,7 @@ bool InlineSpiller::isSibling(unsigned Reg) { /// /// x = def /// spill x -/// y = use x<kill> +/// y = use killed x /// /// This hoist only helps when the copy kills its source. /// @@ -457,7 +477,6 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { } while (!WorkList.empty()); } - //===----------------------------------------------------------------------===// // Rematerialization //===----------------------------------------------------------------------===// @@ -496,7 +515,6 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { - // Analyze instruction SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; MIBundleOperands::VirtRegInfo RI = @@ -654,7 +672,6 @@ void InlineSpiller::reMaterializeAll() { DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); } - //===----------------------------------------------------------------------===// // Spilling //===----------------------------------------------------------------------===// @@ -731,7 +748,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: -foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, +foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, MachineInstr *LoadMI) { if (Ops.empty()) return false; @@ -904,7 +921,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, /// spillAroundUses - insert spill code around each use of Reg. void InlineSpiller::spillAroundUses(unsigned Reg) { - DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n'); + DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. @@ -1060,7 +1077,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) << ':' << edit.getParent() - << "\nFrom original " << PrintReg(Original) << '\n'); + << "\nFrom original " << printReg(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); @@ -1076,49 +1093,52 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { } /// Optimizations after all the reg selections and spills are done. -/// void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); } /// When a spill is inserted, add the spill to MergeableSpills map. -/// void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, unsigned Original) { - StackSlotToReg[StackSlot] = Original; + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + LiveInterval &OrigLI = LIS.getInterval(Original); + // save a copy of LiveInterval in StackSlotToOrigLI because the original + // LiveInterval may be cleared after all its references are spilled. + if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { + auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight); + LI->assign(OrigLI, Allocator); + StackSlotToOrigLI[StackSlot] = std::move(LI); + } SlotIndex Idx = LIS.getInstructionIndex(Spill); - VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot()); + VNInfo *OrigVNI = StackSlotToOrigLI[StackSlot]->getVNInfoAt(Idx.getRegSlot()); std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI); MergeableSpills[MIdx].insert(&Spill); } /// When a spill is removed, remove the spill from MergeableSpills map. /// Return true if the spill is removed successfully. -/// bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, int StackSlot) { - int Original = StackSlotToReg[StackSlot]; - if (!Original) + auto It = StackSlotToOrigLI.find(StackSlot); + if (It == StackSlotToOrigLI.end()) return false; SlotIndex Idx = LIS.getInstructionIndex(Spill); - VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot()); + VNInfo *OrigVNI = It->second->getVNInfoAt(Idx.getRegSlot()); std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI); return MergeableSpills[MIdx].erase(&Spill); } /// Check BB to see if it is a possible target BB to place a hoisted spill, /// i.e., there should be a living sibling of OrigReg at the insert point. -/// -bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, +bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, MachineBasicBlock &BB, unsigned &LiveReg) { SlotIndex Idx; - LiveInterval &OrigLI = LIS.getInterval(OrigReg); + unsigned OrigReg = OrigLI.reg; MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); if (MI != BB.end()) Idx = LIS.getInstructionIndex(*MI); else Idx = LIS.getMBBEndIdx(&BB).getPrevSlot(); SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg]; - assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI && - "Unexpected VNI"); + assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI"); for (auto const SibReg : Siblings) { LiveInterval &LI = LIS.getInterval(SibReg); @@ -1133,7 +1153,6 @@ bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, /// Remove redundant spills in the same BB. Save those redundant spills in /// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map. -/// void HoistSpillHelper::rmRedundantSpills( SmallPtrSet<MachineInstr *, 16> &Spills, SmallVectorImpl<MachineInstr *> &SpillsToRm, @@ -1166,7 +1185,6 @@ void HoistSpillHelper::rmRedundantSpills( /// time. \p SpillBBToSpill will be populated as part of the process and /// maps a basic block to the first store occurring in the basic block. /// \post SpillsToRm.union(Spills\@post) == Spills\@pre -/// void HoistSpillHelper::getVisitOrders( MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, SmallVectorImpl<MachineDomTreeNode *> &Orders, @@ -1254,9 +1272,9 @@ void HoistSpillHelper::getVisitOrders( /// Try to hoist spills according to BB hotness. The spills to removed will /// be saved in \p SpillsToRm. The spills to be inserted will be saved in /// \p SpillsToIns. -/// void HoistSpillHelper::runHoistSpills( - unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills, + LiveInterval &OrigLI, VNInfo &OrigVNI, + SmallPtrSet<MachineInstr *, 16> &Spills, SmallVectorImpl<MachineInstr *> &SpillsToRm, DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) { // Visit order of dominator tree nodes. @@ -1280,9 +1298,10 @@ void HoistSpillHelper::runHoistSpills( // nodes set and the cost of all the spills inside those nodes. // The nodes set are the locations where spills are to be inserted // in the subtree of current node. - typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency> - NodesCostPair; + using NodesCostPair = + std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>; DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap; + // Iterate Orders set in reverse order, which will be a bottom-up order // in the dominator tree. Once we visit a dom tree node, we know its // children have already been visited and the spill locations in the @@ -1331,7 +1350,7 @@ void HoistSpillHelper::runHoistSpills( // Check whether Block is a possible candidate to insert spill. unsigned LiveReg = 0; - if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg)) + if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg)) continue; // If there are multiple spills that could be merged, bias a little @@ -1391,18 +1410,12 @@ void HoistSpillHelper::runHoistSpills( /// bottom-up order, and for each node, we will decide whether to hoist spills /// inside its subtree to that node. In this way, we can get benefit locally /// even if hoisting all the equal spills to one cold place is impossible. -/// void HoistSpillHelper::hoistAllSpills() { SmallVector<unsigned, 4> NewVRegs; LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); - // Save the mapping between stackslot and its original reg. - DenseMap<int, unsigned> SlotToOrigReg; for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - int Slot = VRM.getStackSlot(Reg); - if (Slot != VirtRegMap::NO_STACK_SLOT) - SlotToOrigReg[Slot] = VRM.getOriginal(Reg); unsigned Original = VRM.getPreSplitReg(Reg); if (!MRI.def_empty(Reg)) Virt2SiblingsMap[Original].insert(Reg); @@ -1411,8 +1424,7 @@ void HoistSpillHelper::hoistAllSpills() { // Each entry in MergeableSpills contains a spill set with equal values. for (auto &Ent : MergeableSpills) { int Slot = Ent.first.first; - unsigned OrigReg = SlotToOrigReg[Slot]; - LiveInterval &OrigLI = LIS.getInterval(OrigReg); + LiveInterval &OrigLI = *StackSlotToOrigLI[Slot]; VNInfo *OrigVNI = Ent.first.second; SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second; if (Ent.second.empty()) @@ -1431,7 +1443,7 @@ void HoistSpillHelper::hoistAllSpills() { // SpillsToIns is the spill set to be newly inserted after hoisting. DenseMap<MachineBasicBlock *, unsigned> SpillsToIns; - runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); + runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); DEBUG({ dbgs() << "Finally inserted spills in BB: "; diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index f8cc24724580..72227cc7bba9 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -1,4 +1,4 @@ -//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===// +//===- InterferenceCache.cpp - Caching per-block interference -------------===// // // The LLVM Compiler Infrastructure // @@ -12,9 +12,21 @@ //===----------------------------------------------------------------------===// #include "InterferenceCache.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <tuple> using namespace llvm; @@ -149,7 +161,7 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BlockInterference *BI = &Blocks[MBBNum]; ArrayRef<SlotIndex> RegMaskSlots; ArrayRef<const uint32_t*> RegMaskBits; - for (;;) { + while (true) { BI->Tag = Tag; BI->First = BI->Last = SlotIndex(); diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 18aa5c7c5ad6..160e2b16e294 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -1,4 +1,4 @@ -//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +//===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -15,47 +15,53 @@ #ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H #define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Compiler.h" +#include <cassert> +#include <cstddef> +#include <cstdlib> namespace llvm { class LiveIntervals; +class MachineFunction; +class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY InterferenceCache { - const TargetRegisterInfo *TRI; - LiveIntervalUnion *LIUArray; - MachineFunction *MF; - /// BlockInterference - information about the interference in a single basic /// block. struct BlockInterference { - BlockInterference() : Tag(0) {} - unsigned Tag; + unsigned Tag = 0; SlotIndex First; SlotIndex Last; + + BlockInterference() {} }; /// Entry - A cache entry containing interference information for all aliases /// of PhysReg in all basic blocks. class Entry { /// PhysReg - The register currently represented. - unsigned PhysReg; + unsigned PhysReg = 0; /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions /// change. - unsigned Tag; + unsigned Tag = 0; /// RefCount - The total number of Cursor instances referring to this Entry. - unsigned RefCount; + unsigned RefCount = 0; /// MF - The current function. MachineFunction *MF; /// Indexes - Mapping block numbers to SlotIndex ranges. - SlotIndexes *Indexes; + SlotIndexes *Indexes = nullptr; /// LIS - Used for accessing register mask interference maps. - LiveIntervals *LIS; + LiveIntervals *LIS = nullptr; /// PrevPos - The previous position the iterators were moved to. SlotIndex PrevPos; @@ -72,13 +78,12 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { unsigned VirtTag; /// Fixed interference in RegUnit. - LiveRange *Fixed; + LiveRange *Fixed = nullptr; /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; - RegUnitInfo(LiveIntervalUnion &LIU) - : VirtTag(LIU.getTag()), Fixed(nullptr) { + RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()) { VirtI.setMap(LIU.getMap()); } }; @@ -94,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {} + Entry() = default; void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); @@ -134,13 +139,17 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { // robin manner. enum { CacheEntries = 32 }; + const TargetRegisterInfo *TRI = nullptr; + LiveIntervalUnion *LIUArray = nullptr; + MachineFunction *MF = nullptr; + // Point to an entry for each physreg. The entry pointed to may not be up to // date, and it may have been reused for a different physreg. - unsigned char* PhysRegEntries; - size_t PhysRegEntriesCount; + unsigned char* PhysRegEntries = nullptr; + size_t PhysRegEntriesCount = 0; // Next round-robin entry to be picked. - unsigned RoundRobin; + unsigned RoundRobin = 0; // The actual cache entries. Entry Entries[CacheEntries]; @@ -149,9 +158,9 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { Entry *get(unsigned PhysReg); public: - InterferenceCache() - : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr), - PhysRegEntriesCount(0), RoundRobin(0) {} + friend class Cursor; + + InterferenceCache() = default; ~InterferenceCache() { free(PhysRegEntries); @@ -160,8 +169,9 @@ public: void reinitPhysRegEntries(); /// init - Prepare cache for a new function. - void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, - const TargetRegisterInfo *); + void init(MachineFunction *mf, LiveIntervalUnion *liuarray, + SlotIndexes *indexes, LiveIntervals *lis, + const TargetRegisterInfo *tri); /// getMaxCursors - Return the maximum number of concurrent cursors that can /// be supported. @@ -169,8 +179,8 @@ public: /// Cursor - The primary query interface for the block interference cache. class Cursor { - Entry *CacheEntry; - const BlockInterference *Current; + Entry *CacheEntry = nullptr; + const BlockInterference *Current = nullptr; static const BlockInterference NoInterference; void setEntry(Entry *E) { @@ -186,10 +196,9 @@ public: public: /// Cursor - Create a dangling cursor. - Cursor() : CacheEntry(nullptr), Current(nullptr) {} - ~Cursor() { setEntry(nullptr); } + Cursor() = default; - Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) { + Cursor(const Cursor &O) { setEntry(O.CacheEntry); } @@ -198,6 +207,8 @@ public: return *this; } + ~Cursor() { setEntry(nullptr); } + /// setPhysReg - Point this cursor to PhysReg's interference. void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { // Release reference before getting a new one. That guarantees we can @@ -229,10 +240,8 @@ public: return Current->Last; } }; - - friend class Cursor; }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_INTERFERENCECACHE_H diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index ee4929c91482..9c906d309639 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -1,4 +1,4 @@ -//===--------------------- InterleavedAccessPass.cpp ----------------------===// +//===- InterleavedAccessPass.cpp ------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -42,17 +42,32 @@ // // Similarly, a set of interleaved stores can be transformed into an optimized // sequence of shuffles followed by a set of target specific stores for X86. +// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <utility> using namespace llvm; @@ -66,10 +81,10 @@ static cl::opt<bool> LowerInterleavedAccesses( namespace { class InterleavedAccess : public FunctionPass { - public: static char ID; - InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) { + + InterleavedAccess() : FunctionPass(ID) { initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); } @@ -83,8 +98,8 @@ public: } private: - DominatorTree *DT; - const TargetLowering *TLI; + DominatorTree *DT = nullptr; + const TargetLowering *TLI = nullptr; /// The maximum supported interleave factor. unsigned MaxFactor; @@ -104,9 +119,11 @@ private: bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts, ArrayRef<ShuffleVectorInst *> Shuffles); }; + } // end anonymous namespace. char InterleavedAccess::ID = 0; + INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) @@ -331,7 +348,6 @@ bool InterleavedAccess::lowerInterleavedLoad( bool InterleavedAccess::tryReplaceExtracts( ArrayRef<ExtractElementInst *> Extracts, ArrayRef<ShuffleVectorInst *> Shuffles) { - // If there aren't any extractelement instructions to modify, there's nothing // to do. if (Extracts.empty()) @@ -342,7 +358,6 @@ bool InterleavedAccess::tryReplaceExtracts( DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap; for (auto *Extract : Extracts) { - // The vector index that is extracted. auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand()); auto Index = IndexOperand->getSExtValue(); @@ -351,7 +366,6 @@ bool InterleavedAccess::tryReplaceExtracts( // extractelement instruction (which uses an interleaved load) to use one // of the shufflevector instructions instead of the load. for (auto *Shuffle : Shuffles) { - // If the shufflevector instruction doesn't dominate the extract, we // can't create a use of it. if (!DT->dominates(Shuffle, Extract)) diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index c6cc909e25d3..12777d5ed110 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -57,10 +57,10 @@ static void EnsureFPIntrinsicsExist(Module &M, Function &Fn, } } -/// ReplaceCallWith - This function is used when we want to lower an intrinsic -/// call to a call of an external function. This handles hard cases such as -/// when there was already a prototype for the external function, and if that -/// prototype doesn't match the arguments we expect to pass in. +/// This function is used when we want to lower an intrinsic call to a call of +/// an external function. This handles hard cases such as when there was already +/// a prototype for the external function, but that prototype doesn't match the +/// arguments we expect to pass in. template <class ArgIt> static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, ArgIt ArgBegin, ArgIt ArgEnd, @@ -161,12 +161,11 @@ void IntrinsicLowering::AddPrototypes(Module &M) { } } -/// LowerBSWAP - Emit the code to lower bswap of V before the specified -/// instruction IP. +/// Emit the code to lower bswap of V before the specified instruction IP. static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { - assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); + assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!"); - unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned BitSize = V->getType()->getScalarSizeInBits(); IRBuilder<> Builder(IP); @@ -190,10 +189,10 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), "bswap.1"); Tmp3 = Builder.CreateAnd(Tmp3, - ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), + ConstantInt::get(V->getType(), 0xFF0000), "bswap.and3"); Tmp2 = Builder.CreateAnd(Tmp2, - ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), + ConstantInt::get(V->getType(), 0xFF00), "bswap.and2"); Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); @@ -221,27 +220,27 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { ConstantInt::get(V->getType(), 56), "bswap.1"); Tmp7 = Builder.CreateAnd(Tmp7, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF000000000000ULL), "bswap.and7"); Tmp6 = Builder.CreateAnd(Tmp6, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF0000000000ULL), "bswap.and6"); Tmp5 = Builder.CreateAnd(Tmp5, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF00000000ULL), "bswap.and5"); Tmp4 = Builder.CreateAnd(Tmp4, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF000000ULL), "bswap.and4"); Tmp3 = Builder.CreateAnd(Tmp3, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF0000ULL), "bswap.and3"); Tmp2 = Builder.CreateAnd(Tmp2, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF00ULL), "bswap.and2"); Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); @@ -257,8 +256,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { return V; } -/// LowerCTPOP - Emit the code to lower ctpop of V before the specified -/// instruction IP. +/// Emit the code to lower ctpop of V before the specified instruction IP. static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); @@ -297,8 +295,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { return Count; } -/// LowerCTLZ - Emit the code to lower ctlz of V before the specified -/// instruction IP. +/// Emit the code to lower ctlz of V before the specified instruction IP. static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { IRBuilder<> Builder(IP); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index f2defb4fd623..92edfb059ad6 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -16,11 +16,12 @@ #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" @@ -29,10 +30,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/Scalar.h" using namespace llvm; void LLVMTargetMachine::initAsmInfo() { @@ -77,7 +76,6 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) { - T.adjustCodeGenOpts(TT, RM, CM); this->RM = RM; this->CMModel = CM; this->OptLevel = OL; @@ -92,24 +90,25 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, - bool DisableVerify, AnalysisID StartBefore, - AnalysisID StartAfter, AnalysisID StopBefore, - AnalysisID StopAfter) { + bool DisableVerify, bool &WillCompleteCodeGenPipeline, + raw_pwrite_stream &Out, MachineModuleInfo *MMI) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); - PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore, - StopAfter); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); + WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline(); PM.add(PassConfig); - MachineModuleInfo *MMI = new MachineModuleInfo(TM); + if (!MMI) + MMI = new MachineModuleInfo(TM); PM.add(MMI); if (PassConfig->addISelPasses()) return nullptr; PassConfig->addMachinePasses(); PassConfig->setInitialized(); + if (!WillCompleteCodeGenPipeline) + PM.add(createPrintMIRPass(Out)); return &MMI->getContext(); } @@ -163,7 +162,8 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( - T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, Context, std::unique_ptr<MCAsmBackend>(MAB), Out, + std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; @@ -185,23 +185,20 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, return false; } -bool LLVMTargetMachine::addPassesToEmitFile( - PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, - bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, - AnalysisID StopBefore, AnalysisID StopAfter) { +bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + raw_pwrite_stream &Out, + CodeGenFileType FileType, + bool DisableVerify, + MachineModuleInfo *MMI) { // Add common CodeGen passes. - MCContext *Context = - addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, - StopBefore, StopAfter); + bool WillCompleteCodeGenPipeline = true; + MCContext *Context = addPassesToGenerateCode( + this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI); if (!Context) return true; - if (StopBefore || StopAfter) { - PM.add(createPrintMIRPass(Out)); - } else { - if (addAsmPrinter(PM, Out, FileType, *Context)) - return true; - } + if (WillCompleteCodeGenPipeline && addAsmPrinter(PM, Out, FileType, *Context)) + return true; PM.add(createFreeMachineFunctionPass()); return false; @@ -216,10 +213,13 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr, - nullptr, nullptr); + bool WillCompleteCodeGenPipeline = true; + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, + WillCompleteCodeGenPipeline, Out, + /*MachineModuleInfo*/ nullptr); if (!Ctx) return true; + assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered"); if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); @@ -238,7 +238,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const Triple &T = getTargetTriple(); const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( - T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out, + std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 86ef898932a7..8ffd51a550fc 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -134,6 +134,7 @@ SUnit *LatencyPriorityQueue::pop() { void LatencyPriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = find(Queue, SU); + assert(I != Queue.end() && "Queue doesn't contain the SU being removed!"); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 995c58a63564..8c54751ee833 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -47,11 +47,11 @@ void LexicalScopes::reset() { /// initialize - Scan machine function and constuct lexical scope nest. void LexicalScopes::initialize(const MachineFunction &Fn) { + reset(); // Don't attempt any lexical scope creation for a NoDebug compile unit. - if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() == + if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; - reset(); MF = &Fn; SmallVector<InsnRange, 4> MIRanges; DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap; @@ -173,7 +173,7 @@ LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) { false)).first; if (!Parent) { - assert(cast<DISubprogram>(Scope)->describes(MF->getFunction())); + assert(cast<DISubprogram>(Scope)->describes(&MF->getFunction())); assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; } @@ -277,7 +277,9 @@ void LexicalScopes::assignInstructionRanges( /// DebugLoc. void LexicalScopes::getMachineBasicBlocks( const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { + assert(MF && "Method called on a uninitialized LexicalScopes object!"); MBBs.clear(); + LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return; @@ -296,6 +298,7 @@ void LexicalScopes::getMachineBasicBlocks( /// dominates - Return true if DebugLoc's lexical scope dominates at least one /// machine instruction's lexical scope in a given machine basic block. bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { + assert(MF && "Unexpected uninitialized LexicalScopes object!"); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return false; diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index b5e705f6455d..19ec281079cb 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -1,4 +1,4 @@ -//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===// +//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===// // // The LLVM Compiler Infrastructure // @@ -18,28 +18,45 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <list> +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> #include <queue> +#include <utility> +#include <vector> using namespace llvm; @@ -47,8 +64,6 @@ using namespace llvm; STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -namespace { - // \brief If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) { @@ -59,8 +74,9 @@ static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) { return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; } -class LiveDebugValues : public MachineFunctionPass { +namespace { +class LiveDebugValues : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; @@ -87,15 +103,15 @@ private: }; /// Based on std::pair so it can be used as an index into a DenseMap. - typedef std::pair<const DILocalVariable *, const DILocation *> - DebugVariableBase; + using DebugVariableBase = + std::pair<const DILocalVariable *, const DILocation *>; /// A potentially inlined instance of a variable. struct DebugVariable : public DebugVariableBase { DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt) : DebugVariableBase(Var, InlinedAt) {} - const DILocalVariable *getVar() const { return this->first; }; - const DILocation *getInlinedAt() const { return this->second; }; + const DILocalVariable *getVar() const { return this->first; } + const DILocation *getInlinedAt() const { return this->second; } bool operator<(const DebugVariable &DV) const { if (getVar() == DV.getVar()) @@ -109,38 +125,25 @@ private: const DebugVariable Var; const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. mutable UserValueScopes UVS; - enum { InvalidKind = 0, RegisterKind } Kind; + enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind; /// The value location. Stored separately to avoid repeatedly /// extracting it from MI. union { - struct { - uint32_t RegNo; - uint32_t Offset; - } RegisterLoc; + uint64_t RegNo; uint64_t Hash; } Loc; VarLoc(const MachineInstr &MI, LexicalScopes &LS) : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI), - UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) { + UVS(MI.getDebugLoc(), LS) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); if (int RegNo = isDbgValueDescribedByReg(MI)) { Kind = RegisterKind; - Loc.RegisterLoc.RegNo = RegNo; - int64_t Offset = - MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0; - // We don't support offsets larger than 4GiB here. They are - // slated to be replaced with DIExpressions anyway. - // With indirect debug values used for spill locations, Offset - // can be negative. - if (Offset == INT64_MIN || std::abs(Offset) >= (1LL << 32)) - Kind = InvalidKind; - else - Loc.RegisterLoc.Offset = Offset; + Loc.RegNo = RegNo; } } @@ -148,7 +151,7 @@ private: /// otherwise return 0. unsigned isDescribedByReg() const { if (Kind == RegisterKind) - return Loc.RegisterLoc.RegNo; + return Loc.RegNo; return 0; } @@ -172,14 +175,14 @@ private: } }; - typedef UniqueVector<VarLoc> VarLocMap; - typedef SparseBitVector<> VarLocSet; - typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB; + using VarLocMap = UniqueVector<VarLoc>; + using VarLocSet = SparseBitVector<>; + using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; struct SpillDebugPair { MachineInstr *SpillInst; MachineInstr *DebugInst; }; - typedef SmallVector<SpillDebugPair, 4> SpillMap; + using SpillMap = SmallVector<SpillDebugPair, 4>; /// This holds the working set of currently open ranges. For fast /// access, this is done both as a set of VarLocIDs, and a map of @@ -275,14 +278,16 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; -} // namespace +} // end anonymous namespace //===----------------------------------------------------------------------===// // Implementation //===----------------------------------------------------------------------===// char LiveDebugValues::ID = 0; + char &llvm::LiveDebugValuesID = LiveDebugValues::ID; + INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false, false) @@ -368,7 +373,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, void LiveDebugValues::transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, const VarLocMap &VarLocIDs) { - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); SparseBitVector<> KillSet; @@ -444,14 +449,14 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI, VarLocMap &VarLocIDs, SpillMap &Spills) { unsigned Reg; - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); if (!isSpillInstruction(MI, MF, Reg)) return; // Check if the register is the location of a debug value. for (unsigned ID : OpenRanges.getVarLocs()) { if (VarLocIDs[ID].isDescribedByReg() == Reg) { - DEBUG(dbgs() << "Spilling Register " << PrintReg(Reg, TRI) << '(' + DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); // Create a DBG_VALUE instruction to describe the Var in its spilled @@ -460,10 +465,11 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI, unsigned SpillBase; int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase); const MachineInstr *DMI = &VarLocIDs[ID].MI; + auto *SpillExpr = DIExpression::prepend( + DMI->getDebugExpression(), DIExpression::NoDeref, SpillOffset); MachineInstr *SpDMI = - BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, 0, - DMI->getDebugVariable(), DMI->getDebugExpression()); - SpDMI->getOperand(1).setImm(SpillOffset); + BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, + DMI->getDebugVariable(), SpillExpr); DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; SpDMI->print(dbgs(), false, TII)); @@ -582,7 +588,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, const MachineInstr *DMI = &DiffIt.MI; MachineInstr *MI = BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), - DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0, + DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), DMI->getDebugVariable(), DMI->getDebugExpression()); if (DMI->isIndirectDebugValue()) MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); @@ -597,7 +603,6 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { - DEBUG(dbgs() << "\nDebug Range Extension\n"); bool Changed = false; @@ -698,10 +703,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { } bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { - if (!MF.getFunction()->getSubprogram()) + if (!MF.getFunction().getSubprogram()) // LiveDebugValues will already have removed all DBG_VALUEs. return false; + // Skip functions from NoDebug compilation units. + if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() == + DICompileUnit::NoDebug) + return false; + TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 0c76478af551..34572f24c181 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -20,26 +20,44 @@ //===----------------------------------------------------------------------===// #include "LiveDebugVariables.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Value.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <iterator> #include <memory> #include <utility> @@ -52,6 +70,7 @@ EnableLDV("live-debug-variables", cl::init(true), cl::desc("Enable the live debug variables pass"), cl::Hidden); STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); + char LiveDebugVariables::ID = 0; INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE, @@ -68,12 +87,56 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) { +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); } +enum : unsigned { UndefLocNo = ~0U }; + +/// Describes a location by number along with some flags about the original +/// usage of the location. +class DbgValueLocation { +public: + DbgValueLocation(unsigned LocNo, bool WasIndirect) + : LocNo(LocNo), WasIndirect(WasIndirect) { + static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); + assert(locNo() == LocNo && "location truncation"); + } + + DbgValueLocation() : LocNo(0), WasIndirect(0) {} + + unsigned locNo() const { + // Fix up the undef location number, which gets truncated. + return LocNo == INT_MAX ? UndefLocNo : LocNo; + } + bool wasIndirect() const { return WasIndirect; } + bool isUndef() const { return locNo() == UndefLocNo; } + + DbgValueLocation changeLocNo(unsigned NewLocNo) const { + return DbgValueLocation(NewLocNo, WasIndirect); + } + + friend inline bool operator==(const DbgValueLocation &LHS, + const DbgValueLocation &RHS) { + return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; + } + + friend inline bool operator!=(const DbgValueLocation &LHS, + const DbgValueLocation &RHS) { + return !(LHS == RHS); + } + +private: + unsigned LocNo : 31; + unsigned WasIndirect : 1; +}; + /// LocMap - Map of where a user value is live, and its location. -typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; +using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>; + +namespace { + +class LDVImpl; /// UserValue - A user value is a part of a debug info user variable. /// @@ -84,17 +147,13 @@ typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; /// user values are related if they refer to the same variable, or if they are /// held by the same virtual register. The equivalence class is the transitive /// closure of that relation. -namespace { -class LDVImpl; class UserValue { - const MDNode *Variable; ///< The debug info variable we are part of. - const MDNode *Expression; ///< Any complex address expression. - unsigned offset; ///< Byte offset into variable. - bool IsIndirect; ///< true if this is a register-indirect+offset value. + const DILocalVariable *Variable; ///< The debug info variable we are part of. + const DIExpression *Expression; ///< Any complex address expression. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. - UserValue *next; ///< Next value in equivalence class, or null. + UserValue *next = nullptr; ///< Next value in equivalence class, or null. /// Numbered locations referenced by locmap. SmallVector<MachineOperand, 4> locations; @@ -102,14 +161,16 @@ class UserValue { /// Map of slot indices where this value is live. LocMap locInts; - /// coalesceLocation - After LocNo was changed, check if it has become - /// identical to another location, and coalesce them. This may cause LocNo or - /// a later location to be erased, but no earlier location will be erased. - void coalesceLocation(unsigned LocNo); + /// Set of interval start indexes that have been trimmed to the + /// lexical scope. + SmallSet<SlotIndex, 2> trimmedDefs; /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo. - void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, - LiveIntervals &LIS, const TargetInstrInfo &TII); + void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, + SlotIndex StopIdx, + DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI); /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. @@ -118,10 +179,10 @@ class UserValue { public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i, - DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), offset(o), IsIndirect(i), - dl(std::move(L)), leader(this), next(nullptr), locInts(alloc) {} + UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, + LocMap::Allocator &alloc) + : Variable(var), Expression(expr), dl(std::move(L)), leader(this), + locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. UserValue *getLeader() { @@ -135,10 +196,11 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA, - unsigned Offset, bool indirect) const { - return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA && - Offset == offset && indirect == IsIndirect; + bool match(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) const { + // FIXME: The fragment should be part of the equivalence class, but not + // other things in the expression like stack values. + return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; } /// merge - Merge equivalence classes. @@ -165,7 +227,7 @@ public: unsigned getLocationNo(const MachineOperand &LocMO) { if (LocMO.isReg()) { if (LocMO.getReg() == 0) - return ~0u; + return UndefLocNo; // For register locations we dont care about use/def and other flags. for (unsigned i = 0, e = locations.size(); i != e; ++i) if (locations[i].isReg() && @@ -189,14 +251,15 @@ public: void mapVirtRegs(LDVImpl *LDV); /// addDef - Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { + DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); // Add a singular (Idx,Idx) -> Loc mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) - I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO)); + I.insert(Idx, Idx.getNextSlot(), Loc); else // A later DBG_VALUE at the same SlotIndex overrides the old location. - I.setValue(getLocationNo(LocMO)); + I.setValue(Loc); } /// extendDef - Extend the current definition as far as possible down. @@ -204,12 +267,12 @@ public: /// range of VNI. /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. - /// @param LocNo Location number to propagate. + /// @param Loc Location number to propagate. /// @param LR Restrict liveness to where LR has the value VNI. May be null. /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. - void extendDef(SlotIndex Idx, unsigned LocNo, + void extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS); @@ -219,18 +282,19 @@ public: /// points, and add defs if possible. /// @param LI Scan for copies of the value in LI->reg. /// @param LocNo Location number of LI->reg. + /// @param WasIndirect Indicates if the original use of LI->reg was indirect /// @param Kills Points where the range of LocNo could be extended. /// @param NewDefs Append (Idx, LocNo) of inserted defs here. - void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, - const SmallVectorImpl<SlotIndex> &Kills, - SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, - MachineRegisterInfo &MRI, - LiveIntervals &LIS); + void addDefsFromCopies( + LiveInterval *LI, unsigned LocNo, bool WasIndirect, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS); /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - LiveIntervals &LIS); + LiveIntervals &LIS, LexicalScopes &LS); /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. @@ -238,47 +302,50 @@ public: LiveIntervals &LIS); /// rewriteLocations - Rewrite virtual register locations according to the - /// provided virtual register map. - void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); + /// provided virtual register map. Record which locations were spilled. + void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, + BitVector &SpilledLocations); /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. - void emitDebugValues(VirtRegMap *VRM, - LiveIntervals &LIS, const TargetInstrInfo &TRI); + void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const BitVector &SpilledLocations); /// getDebugLoc - Return DebugLoc of this UserValue. DebugLoc getDebugLoc() { return dl;} + void print(raw_ostream &, const TargetRegisterInfo *); }; -} // namespace /// LDVImpl - Implementation of the LiveDebugVariables pass. -namespace { class LDVImpl { LiveDebugVariables &pass; LocMap::Allocator allocator; - MachineFunction *MF; + MachineFunction *MF = nullptr; LiveIntervals *LIS; const TargetRegisterInfo *TRI; /// Whether emitDebugValues is called. - bool EmitDone; + bool EmitDone = false; + /// Whether the machine function is modified during the pass. - bool ModifiedMF; + bool ModifiedMF = false; /// userValues - All allocated UserValue instances. SmallVector<std::unique_ptr<UserValue>, 8> userValues; /// Map virtual register to eq class leader. - typedef DenseMap<unsigned, UserValue*> VRMap; + using VRMap = DenseMap<unsigned, UserValue *>; VRMap virtRegToEqClass; /// Map user variable to eq class leader. - typedef DenseMap<const MDNode *, UserValue*> UVMap; + using UVMap = DenseMap<const DILocalVariable *, UserValue *>; UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, const MDNode *Expr, - unsigned Offset, bool IsIndirect, const DebugLoc &DL); + UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, + const DebugLoc &DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -300,8 +367,8 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) - : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {} + LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} + bool runOnMachineFunction(MachineFunction &mf); /// clear - Release all memory. @@ -328,8 +395,10 @@ public: void print(raw_ostream&); }; -} // namespace +} // end anonymous namespace + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS, const LLVMContext &Ctx) { if (!DL) @@ -372,14 +441,15 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { printExtendedName(OS, DV, dl); OS << "\"\t"; - if (offset) - OS << '+' << offset; for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { OS << " [" << I.start() << ';' << I.stop() << "):"; - if (I.value() == ~0u) + if (I.value().isUndef()) OS << "undef"; - else - OS << I.value(); + else { + OS << I.value().locNo(); + if (I.value().wasIndirect()) + OS << " ind"; + } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { OS << " Loc" << i << '='; @@ -393,34 +463,7 @@ void LDVImpl::print(raw_ostream &OS) { for (unsigned i = 0, e = userValues.size(); i != e; ++i) userValues[i]->print(OS, TRI); } - -void UserValue::coalesceLocation(unsigned LocNo) { - unsigned KeepLoc = 0; - for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) { - if (KeepLoc == LocNo) - continue; - if (locations[KeepLoc].isIdenticalTo(locations[LocNo])) - break; - } - // No matches. - if (KeepLoc == locations.size()) - return; - - // Keep the smaller location, erase the larger one. - unsigned EraseLoc = LocNo; - if (KeepLoc > EraseLoc) - std::swap(KeepLoc, EraseLoc); - locations.erase(locations.begin() + EraseLoc); - - // Rewrite values. - for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { - unsigned v = I.value(); - if (v == EraseLoc) - I.setValue(KeepLoc); // Coalesce when possible. - else if (v > EraseLoc) - I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values. - } -} +#endif void UserValue::mapVirtRegs(LDVImpl *LDV) { for (unsigned i = 0, e = locations.size(); i != e; ++i) @@ -429,20 +472,19 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { LDV->mapVirtReg(locations[i].getReg(), this); } -UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, - unsigned Offset, bool IsIndirect, - const DebugLoc &DL) { +UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, + const DIExpression *Expr, const DebugLoc &DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect)) + if (UV->match(Var, Expr, DL->getInlinedAt())) return UV; } userValues.push_back( - make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator)); + llvm::make_unique<UserValue>(Var, Expr, DL, allocator)); UserValue *UV = userValues.back().get(); Leader = UserValue::merge(Leader, UV); return UV; @@ -469,14 +511,15 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return false; } - // Get or create the UserValue for (variable,offset). - bool IsIndirect = MI.isIndirectDebugValue(); - unsigned Offset = IsIndirect ? MI.getOperand(1).getImm() : 0; - const MDNode *Var = MI.getDebugVariable(); - const MDNode *Expr = MI.getDebugExpression(); - //here. - UserValue *UV = getUserValue(Var, Expr, Offset, IsIndirect, MI.getDebugLoc()); - UV->addDef(Idx, MI.getOperand(0)); + // Get or create the UserValue for (variable,offset) here. + bool IsIndirect = MI.getOperand(1).isImm(); + if (IsIndirect) + assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *Expr = MI.getDebugExpression(); + UserValue *UV = + getUserValue(Var, Expr, MI.getDebugLoc()); + UV->addDef(Idx, MI.getOperand(0), IsIndirect); return true; } @@ -511,7 +554,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a /// data-flow analysis to propagate them beyond basic block boundaries. -void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, +void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS) { SlotIndex Start = Idx; @@ -538,7 +581,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, if (I.valid() && I.start() <= Start) { // Stop when meeting a different location or an already extended interval. Start = Start.getNextSlot(); - if (I.value() != LocNo || I.stop() != Start) + if (I.value() != Loc || I.stop() != Start) return; // This is a one-slot placeholder. Just skip it. ++I; @@ -554,14 +597,14 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, Kills->push_back(Stop); if (Start < Stop) - I.insert(Start, Stop, LocNo); + I.insert(Start, Stop, Loc); } -void -UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, - const SmallVectorImpl<SlotIndex> &Kills, - SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, - MachineRegisterInfo &MRI, LiveIntervals &LIS) { +void UserValue::addDefsFromCopies( + LiveInterval *LI, unsigned LocNo, bool WasIndirect, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. @@ -588,7 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(*MI); LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); - if (!I.valid() || I.value() != LocNo) + if (!I.valid() || I.value().locNo() != LocNo) continue; if (!LIS.hasInterval(DstReg)) @@ -621,69 +664,141 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - I.insert(Idx, Idx.getNextSlot(), LocNo); - NewDefs.push_back(std::make_pair(Idx, LocNo)); + DbgValueLocation NewLoc(LocNo, WasIndirect); + I.insert(Idx, Idx.getNextSlot(), NewLoc); + NewDefs.push_back(std::make_pair(Idx, NewLoc)); break; } } } -void -UserValue::computeIntervals(MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - LiveIntervals &LIS) { - SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; +void UserValue::computeIntervals(MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + LiveIntervals &LIS, LexicalScopes &LS) { + SmallVector<std::pair<SlotIndex, DbgValueLocation>, 16> Defs; // Collect all defs to be extended (Skipping undefs). for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) - if (I.value() != ~0u) + if (!I.value().isUndef()) Defs.push_back(std::make_pair(I.start(), I.value())); // Extend all defs, and possibly add new ones along the way. for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; - unsigned LocNo = Defs[i].second; - const MachineOperand &Loc = locations[LocNo]; + DbgValueLocation Loc = Defs[i].second; + const MachineOperand &LocMO = locations[Loc.locNo()]; - if (!Loc.isReg()) { - extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS); + if (!LocMO.isReg()) { + extendDef(Idx, Loc, nullptr, nullptr, nullptr, LIS); continue; } // Register locations are constrained to where the register value is live. - if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) { LiveInterval *LI = nullptr; const VNInfo *VNI = nullptr; - if (LIS.hasInterval(Loc.getReg())) { - LI = &LIS.getInterval(Loc.getReg()); + if (LIS.hasInterval(LocMO.getReg())) { + LI = &LIS.getInterval(LocMO.getReg()); VNI = LI->getVNInfoAt(Idx); } SmallVector<SlotIndex, 16> Kills; - extendDef(Idx, LocNo, LI, VNI, &Kills, LIS); + extendDef(Idx, Loc, LI, VNI, &Kills, LIS); if (LI) - addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, + LIS); continue; } - // For physregs, use the live range of the first regunit as a guide. - unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); - LiveRange *LR = &LIS.getRegUnit(Unit); - const VNInfo *VNI = LR->getVNInfoAt(Idx); - // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LR, VNI, nullptr, LIS); + // For physregs, we only mark the start slot idx. DwarfDebug will see it + // as if the DBG_VALUE is valid up until the end of the basic block, or + // the next def of the physical register. So we do not need to extend the + // range. It might actually happen that the DBG_VALUE is the last use of + // the physical register (e.g. if this is an unused input argument to a + // function). } - // Finally, erase all the undefs. + // Erase all the undefs. for (LocMap::iterator I = locInts.begin(); I.valid();) - if (I.value() == ~0u) + if (I.value().isUndef()) I.erase(); else ++I; + + // The computed intervals may extend beyond the range of the debug + // location's lexical scope. In this case, splitting of an interval + // can result in an interval outside of the scope being created, + // causing extra unnecessary DBG_VALUEs to be emitted. To prevent + // this, trim the intervals to the lexical scope. + + LexicalScope *Scope = LS.findLexicalScope(dl); + if (!Scope) + return; + + SlotIndex PrevEnd; + LocMap::iterator I = locInts.begin(); + + // Iterate over the lexical scope ranges. Each time round the loop + // we check the intervals for overlap with the end of the previous + // range and the start of the next. The first range is handled as + // a special case where there is no PrevEnd. + for (const InsnRange &Range : Scope->getRanges()) { + SlotIndex RStart = LIS.getInstructionIndex(*Range.first); + SlotIndex REnd = LIS.getInstructionIndex(*Range.second); + + // At the start of each iteration I has been advanced so that + // I.stop() >= PrevEnd. Check for overlap. + if (PrevEnd && I.start() < PrevEnd) { + SlotIndex IStop = I.stop(); + DbgValueLocation Loc = I.value(); + + // Stop overlaps previous end - trim the end of the interval to + // the scope range. + I.setStopUnchecked(PrevEnd); + ++I; + + // If the interval also overlaps the start of the "next" (i.e. + // current) range create a new interval for the remainder (which + // may be further trimmed). + if (RStart < IStop) + I.insert(RStart, IStop, Loc); + } + + // Advance I so that I.stop() >= RStart, and check for overlap. + I.advanceTo(RStart); + if (!I.valid()) + return; + + if (I.start() < RStart) { + // Interval start overlaps range - trim to the scope range. + I.setStartUnchecked(RStart); + // Remember that this interval was trimmed. + trimmedDefs.insert(RStart); + } + + // The end of a lexical scope range is the last instruction in the + // range. To convert to an interval we need the index of the + // instruction after it. + REnd = REnd.getNextIndex(); + + // Advance I to first interval outside current range. + I.advanceTo(REnd); + if (!I.valid()) + return; + + PrevEnd = REnd; + } + + // Check for overlap with end of final range. + if (PrevEnd && I.start() < PrevEnd) + I.setStopUnchecked(PrevEnd); } void LDVImpl::computeIntervals() { + LexicalScopes LS; + LS.initialize(*MF); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS); userValues[i]->mapVirtRegs(this); } } @@ -718,7 +833,7 @@ static void removeDebugValues(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; - if (!mf.getFunction()->getSubprogram()) { + if (!mf.getFunction().getSubprogram()) { removeDebugValues(mf); return false; } @@ -757,7 +872,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, continue; // Don't allocate the new LocNo until it is needed. - unsigned NewLocNo = ~0u; + unsigned NewLocNo = UndefLocNo; // Iterate over the overlaps between locInts and LI. LocMapI.find(LI->beginIndex()); @@ -772,9 +887,9 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, break; // Now LII->end > LocMapI.start(). Do we have an overlap? - if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) { + if (LocMapI.value().locNo() == OldLocNo && LII->start < LocMapI.stop()) { // Overlapping correct location. Allocate NewLocNo now. - if (NewLocNo == ~0u) { + if (NewLocNo == UndefLocNo) { MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); MO.setSubReg(locations[OldLocNo].getSubReg()); NewLocNo = getLocationNo(MO); @@ -783,6 +898,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, SlotIndex LStart = LocMapI.start(); SlotIndex LStop = LocMapI.stop(); + DbgValueLocation OldLoc = LocMapI.value(); // Trim LocMapI down to the LII overlap. if (LStart < LII->start) @@ -791,17 +907,17 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, LocMapI.setStopUnchecked(LII->end); // Change the value in the overlap. This may trigger coalescing. - LocMapI.setValue(NewLocNo); + LocMapI.setValue(OldLoc.changeLocNo(NewLocNo)); // Re-insert any removed OldLocNo ranges. if (LStart < LocMapI.start()) { - LocMapI.insert(LStart, LocMapI.start(), OldLocNo); + LocMapI.insert(LStart, LocMapI.start(), OldLoc); ++LocMapI; assert(LocMapI.valid() && "Unexpected coalescing"); } if (LStop > LocMapI.stop()) { ++LocMapI; - LocMapI.insert(LII->end, LStop, OldLocNo); + LocMapI.insert(LII->end, LStop, OldLoc); --LocMapI; } } @@ -824,14 +940,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, locations.erase(locations.begin() + OldLocNo); LocMapI.goToBegin(); while (LocMapI.valid()) { - unsigned v = LocMapI.value(); - if (v == OldLocNo) { + DbgValueLocation v = LocMapI.value(); + if (v.locNo() == OldLocNo) { DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' << LocMapI.stop() << ")\n"); LocMapI.erase(); } else { - if (v > OldLocNo) - LocMapI.setValueUnchecked(v-1); + if (v.locNo() > OldLocNo) + LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1)); ++LocMapI; } } @@ -876,36 +992,73 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); } -void -UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { - // Iterate over locations in reverse makes it easier to handle coalescing. - for (unsigned i = locations.size(); i ; --i) { - unsigned LocNo = i-1; - MachineOperand &Loc = locations[LocNo]; +void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, + BitVector &SpilledLocations) { + // Build a set of new locations with new numbers so we can coalesce our + // IntervalMap if two vreg intervals collapse to the same physical location. + // Use MapVector instead of SetVector because MapVector::insert returns the + // position of the previously or newly inserted element. The boolean value + // tracks if the location was produced by a spill. + // FIXME: This will be problematic if we ever support direct and indirect + // frame index locations, i.e. expressing both variables in memory and + // 'int x, *px = &x'. The "spilled" bit must become part of the location. + MapVector<MachineOperand, bool> NewLocations; + SmallVector<unsigned, 4> LocNoMap(locations.size()); + for (unsigned I = 0, E = locations.size(); I != E; ++I) { + bool Spilled = false; + MachineOperand Loc = locations[I]; // Only virtual registers are rewritten. - if (!Loc.isReg() || !Loc.getReg() || - !TargetRegisterInfo::isVirtualRegister(Loc.getReg())) - continue; - unsigned VirtReg = Loc.getReg(); - if (VRM.isAssignedReg(VirtReg) && - TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { - // This can create a %noreg operand in rare cases when the sub-register - // index is no longer available. That means the user value is in a - // non-existent sub-register, and %noreg is exactly what we want. - Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); - } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { - // FIXME: Translate SubIdx to a stackslot offset. - Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); - } else { - Loc.setReg(0); - Loc.setSubReg(0); + if (Loc.isReg() && Loc.getReg() && + TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + unsigned VirtReg = Loc.getReg(); + if (VRM.isAssignedReg(VirtReg) && + TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + // This can create a %noreg operand in rare cases when the sub-register + // index is no longer available. That means the user value is in a + // non-existent sub-register, and %noreg is exactly what we want. + Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { + // FIXME: Translate SubIdx to a stackslot offset. + Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); + Spilled = true; + } else { + Loc.setReg(0); + Loc.setSubReg(0); + } + } + + // Insert this location if it doesn't already exist and record a mapping + // from the old number to the new number. + auto InsertResult = NewLocations.insert({Loc, Spilled}); + unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first); + LocNoMap[I] = NewLocNo; + } + + // Rewrite the locations and record which ones were spill slots. + locations.clear(); + SpilledLocations.clear(); + SpilledLocations.resize(NewLocations.size()); + for (auto &Pair : NewLocations) { + locations.push_back(Pair.first); + if (Pair.second) { + unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair); + SpilledLocations.set(NewLocNo); } - coalesceLocation(LocNo); + } + + // Update the interval map, but only coalesce left, since intervals to the + // right use the old location numbers. This should merge two contiguous + // DBG_VALUE intervals with different vregs that were allocated to the same + // physical register. + for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { + DbgValueLocation Loc = I.value(); + unsigned NewLocNo = LocNoMap[Loc.locNo()]; + I.setValueUnchecked(Loc.changeLocNo(NewLocNo)); + I.setStart(I.start()); } } -/// findInsertLocation - Find an iterator for inserting a DBG_VALUE -/// instruction. +/// Find an iterator for inserting a DBG_VALUE instruction. static MachineBasicBlock::iterator findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, LiveIntervals &LIS) { @@ -928,52 +1081,111 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, std::next(MachineBasicBlock::iterator(MI)); } -void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, - unsigned LocNo, +/// Find an iterator for inserting the next DBG_VALUE instruction +/// (or end if no more insert locations found). +static MachineBasicBlock::iterator +findNextInsertLocation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + SlotIndex StopIdx, MachineOperand &LocMO, + LiveIntervals &LIS, + const TargetRegisterInfo &TRI) { + if (!LocMO.isReg()) + return MBB->instr_end(); + unsigned Reg = LocMO.getReg(); + + // Find the next instruction in the MBB that define the register Reg. + while (I != MBB->end()) { + if (!LIS.isNotInMIMap(*I) && + SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I))) + break; + if (I->definesRegister(Reg, &TRI)) + // The insert location is directly after the instruction/bundle. + return std::next(I); + ++I; + } + return MBB->end(); +} + +void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, + SlotIndex StopIdx, + DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS, - const TargetInstrInfo &TII) { - MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); - MachineOperand &Loc = locations[LocNo]; + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) { + SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB); + // Only search within the current MBB. + StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx; + MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS); + MachineOperand &MO = locations[Loc.locNo()]; ++NumInsertedDebugValues; assert(cast<DILocalVariable>(Variable) ->isValidLocationForIntrinsic(getDebugLoc()) && "Expected inlined-at fields to agree"); - if (Loc.isReg()) - BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, Loc.getReg(), offset, Variable, Expression); - else - BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .add(Loc) - .addImm(offset) - .addMetadata(Variable) - .addMetadata(Expression); + + // If the location was spilled, the new DBG_VALUE will be indirect. If the + // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate + // that the original virtual register was a pointer. + const DIExpression *Expr = Expression; + bool IsIndirect = Loc.wasIndirect(); + if (Spilled) { + if (IsIndirect) + Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + IsIndirect = true; + } + + assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); + + do { + MachineInstrBuilder MIB = + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .add(MO); + if (IsIndirect) + MIB.addImm(0U); + else + MIB.addReg(0U, RegState::Debug); + MIB.addMetadata(Variable).addMetadata(Expr); + + // Continue and insert DBG_VALUES after every redefinition of register + // associated with the debug value within the range + I = findNextInsertLocation(MBB, I, StopIdx, MO, LIS, TRI); + } while (I != MBB->end()); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const BitVector &SpilledLocations) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); for (LocMap::const_iterator I = locInts.begin(); I.valid();) { SlotIndex Start = I.start(); SlotIndex Stop = I.stop(); - unsigned LocNo = I.value(); - DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); + DbgValueLocation Loc = I.value(); + bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false; + + // If the interval start was trimmed to the lexical scope insert the + // DBG_VALUE at the previous index (otherwise it appears after the + // first instruction in the range). + if (trimmedDefs.count(Start)) + Start = Start.getPrevIndex(); + + DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); - DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, LocNo, LIS, TII); + DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); + insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. - while(Stop > MBBEnd) { + while (Stop > MBBEnd) { // Move to the next block. Start = MBBEnd; if (++MBB == MFEnd) break; MBBEnd = LIS.getMBBEndIdx(&*MBB); - DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, LocNo, LIS, TII); + DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); + insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); } DEBUG(dbgs() << '\n'); if (MBB == MFEnd) @@ -988,10 +1200,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { if (!MF) return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + BitVector SpilledLocations; for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), TRI)); - userValues[i]->rewriteLocations(*VRM, *TRI); - userValues[i]->emitDebugValues(VRM, *LIS, *TII); + userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations); + userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations); } EmitDone = true; } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 1d7e3d4371a2..aa35880b063a 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -1,4 +1,4 @@ -//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===// +//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,17 +22,16 @@ #define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Compiler.h" namespace llvm { template <typename T> class ArrayRef; -class LiveInterval; class LiveIntervals; class VirtRegMap; class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { - void *pImpl; + void *pImpl = nullptr; public: static char ID; // Pass identification, replacement for typeid @@ -62,14 +61,12 @@ public: void dump() const; private: - bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; bool doInitialization(Module &) override; - }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 9ef9f238fdce..302c75133e35 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -1,4 +1,4 @@ -//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +//===- LiveInterval.cpp - Live Interval Representation --------------------===// // // The LLVM Compiler Infrastructure // @@ -19,20 +19,34 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveInterval.h" - #include "LiveRangeUtils.h" #include "RegisterCoalescer.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <iterator> +#include <utility> + using namespace llvm; namespace { + //===----------------------------------------------------------------------===// // Implementation of various methods necessary for calculation of live ranges. // The implementation of the methods abstracts from the concrete type of the @@ -56,8 +70,8 @@ protected: CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {} public: - typedef LiveRange::Segment Segment; - typedef IteratorT iterator; + using Segment = LiveRange::Segment; + using iterator = IteratorT; /// A counterpart of LiveRange::createDeadDef: Make sure the range has a /// value defined at @p Def. @@ -265,8 +279,9 @@ private: //===----------------------------------------------------------------------===// class CalcLiveRangeUtilVector; -typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator, - LiveRange::Segments> CalcLiveRangeUtilVectorBase; +using CalcLiveRangeUtilVectorBase = + CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator, + LiveRange::Segments>; class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase { public: @@ -292,9 +307,9 @@ private: //===----------------------------------------------------------------------===// class CalcLiveRangeUtilSet; -typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, - LiveRange::SegmentSet::iterator, - LiveRange::SegmentSet> CalcLiveRangeUtilSetBase; +using CalcLiveRangeUtilSetBase = + CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, LiveRange::SegmentSet::iterator, + LiveRange::SegmentSet>; class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase { public: @@ -327,7 +342,8 @@ private: return I; } }; -} // namespace + +} // end anonymous namespace //===----------------------------------------------------------------------===// // LiveRange methods @@ -444,7 +460,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, if (J == JE) return false; - for (;;) { + while (true) { // J has just been advanced to satisfy: assert(J->end >= I->start); // Check for an overlap. @@ -865,7 +881,6 @@ void LiveInterval::clearSubRanges() { void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) { - LaneBitmask ToApply = LaneMask; for (SubRange &SR : subranges()) { LaneBitmask SRMask = SR.LaneMask; @@ -925,8 +940,8 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs, } } -raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { - return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')'; +raw_ostream& llvm::operator<<(raw_ostream& OS, const LiveRange::Segment &S) { + return OS << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')'; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -971,7 +986,7 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const { } void LiveInterval::print(raw_ostream &OS) const { - OS << PrintReg(reg) << ' '; + OS << printReg(reg) << ' '; super::print(OS); // Print subranges for (const SubRange &SR : subranges()) @@ -1033,7 +1048,6 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { } #endif - //===----------------------------------------------------------------------===// // LiveRangeUpdater class //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index b3248e53d0a5..3e742a6c2f21 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstdlib> @@ -87,7 +87,7 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { } for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { OS << " [" << SI.start() << ' ' << SI.stop() << "):" - << PrintReg(SI.value()->reg, TRI); + << printReg(SI.value()->reg, TRI); } OS << '\n'; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervals.cpp index 0e240f482a19..79fdba7e062a 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -1,4 +1,4 @@ -//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===// +//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,7 +14,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -34,6 +34,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" @@ -44,8 +46,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -157,7 +157,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { // Dump the regunits. for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit) if (LiveRange *LR = RegUnitRanges[Unit]) - OS << PrintRegUnit(Unit, TRI) << ' ' << *LR << '\n'; + OS << printRegUnit(Unit, TRI) << ' ' << *LR << '\n'; // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { @@ -323,7 +323,7 @@ void LiveIntervals::computeLiveInRegUnits() { // Create phi-defs at Begin for all live-in registers. SlotIndex Begin = Indexes->getMBBStartIdx(&MBB); - DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber()); + DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB)); for (const auto &LI : MBB.liveins()) { for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; @@ -335,7 +335,7 @@ void LiveIntervals::computeLiveInRegUnits() { } VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); (void)VNI; - DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); + DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id); } } DEBUG(dbgs() << '\n'); @@ -698,11 +698,11 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // - // %EAX = COPY %vreg5 - // FOO %vreg5 <--- MI, cancel kill because %EAX is live. - // BAR %EAX<kill> + // %eax = COPY %5 + // FOO %5 <--- MI, cancel kill because %eax is live. + // BAR killed %eax // - // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + // There should be no kill flag on FOO when %5 is rewritten as %eax. for (auto &RUP : RU) { const LiveRange &RURange = *RUP.first; LiveRange::const_iterator &I = RUP.second; @@ -719,13 +719,13 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // When reading a partial undefined value we must not add a kill flag. // The regalloc might have used the undef lane for something else. // Example: - // %vreg1 = ... ; R32: %vreg1 - // %vreg2:high16 = ... ; R64: %vreg2 - // = read %vreg2<kill> ; R64: %vreg2 - // = read %vreg1 ; R32: %vreg1 - // The <kill> flag is correct for %vreg2, but the register allocator may - // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 - // are actually never written by %vreg2. After assignment the <kill> + // %1 = ... ; R32: %1 + // %2:high16 = ... ; R64: %2 + // = read killed %2 ; R64: %2 + // = read %1 ; R32: %1 + // The <kill> flag is correct for %2, but the register allocator may + // assign R0L to %1, and R0 to %2 because the low 32bits of R0 + // are actually never written by %2. After assignment the <kill> // flag at the read instruction is invalid. LaneBitmask DefinedLanesMask; if (!SRs.empty()) { @@ -824,7 +824,13 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, const MachineInstr &MI) { - BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent()); + return getSpillWeight(isDef, isUse, MBFI, MI.getParent()); +} + +float LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineBasicBlock *MBB) { + BlockFrequency Freq = MBFI->getBlockFreq(MBB); const float Scale = 1.0f / MBFI->getEntryFreq(); return (isDef + isUse) * (Freq.getFrequency() * Scale); } @@ -989,11 +995,11 @@ private: DEBUG({ dbgs() << " "; if (TargetRegisterInfo::isVirtualRegister(Reg)) { - dbgs() << PrintReg(Reg); + dbgs() << printReg(Reg); if (LaneMask.any()) dbgs() << " L" << PrintLaneMask(LaneMask); } else { - dbgs() << PrintRegUnit(Reg, &TRI); + dbgs() << printRegUnit(Reg, &TRI); } dbgs() << ":\t" << LR << '\n'; }); diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index cde6ccd29dfd..f4b43a9b8ead 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -40,10 +40,8 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, } } -/// Simulates liveness when stepping backwards over an instruction(bundle): -/// Remove Defs, add uses. This is the recommended way of calculating liveness. -void LivePhysRegs::stepBackward(const MachineInstr &MI) { - // Remove defined registers and regmask kills from the set. +/// Remove defined registers and regmask kills from the set. +void LivePhysRegs::removeDefs(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { if (!O->isDef()) @@ -55,8 +53,10 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { } else if (O->isRegMask()) removeRegsInMask(*O); } +} - // Add uses to the set. +/// Add uses to the set. +void LivePhysRegs::addUses(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg()) continue; @@ -67,6 +67,16 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { } } +/// Simulates liveness when stepping backwards over an instruction(bundle): +/// Remove Defs, add uses. This is the recommended way of calculating liveness. +void LivePhysRegs::stepBackward(const MachineInstr &MI) { + // Remove defined registers and regmask kills from the set. + removeDefs(MI); + + // Add uses to the set. + addUses(MI); +} + /// Simulates liveness when stepping forward over an instruction(bundle): Remove /// killed-uses, add defs. This is the not recommended way, because it depends /// on accurate kill flags. If possible use stepBackward() instead of this @@ -116,7 +126,7 @@ void LivePhysRegs::print(raw_ostream &OS) const { } for (const_iterator I = begin(), E = end(); I != E; ++I) - OS << " " << PrintReg(*I, TRI); + OS << " " << printReg(*I, TRI); OS << "\n"; } @@ -166,17 +176,32 @@ static void addCalleeSavedRegs(LivePhysRegs &LiveRegs, LiveRegs.addReg(*CSR); } -/// Adds pristine registers to the given \p LiveRegs. Pristine registers are -/// callee saved registers that are unused in the function. -static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) { +void LivePhysRegs::addPristines(const MachineFunction &MF) { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!MFI.isCalleeSavedInfoValid()) return; + /// This function will usually be called on an empty object, handle this + /// as a special case. + if (empty()) { + /// Add all callee saved regs, then remove the ones that are saved and + /// restored. + addCalleeSavedRegs(*this, MF); + /// Remove the ones that are not saved/restored; they are pristine. + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + removeReg(Info.getReg()); + return; + } + /// If a callee-saved register that is not pristine is already present + /// in the set, we should make sure that it stays in it. Precompute the + /// set of pristine registers in a separate object. /// Add all callee saved regs, then remove the ones that are saved+restored. - addCalleeSavedRegs(LiveRegs, MF); + LivePhysRegs Pristine(*TRI); + addCalleeSavedRegs(Pristine, MF); /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) - LiveRegs.removeReg(Info.getReg()); + Pristine.removeReg(Info.getReg()); + for (MCPhysReg R : Pristine) + addReg(R); } void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { @@ -192,7 +217,8 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) { for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) - addReg(Info.getReg()); + if (Info.isRestored()) + addReg(Info.getReg()); } } } @@ -200,7 +226,7 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); if (!MBB.succ_empty()) { - addPristines(*this, MF); + addPristines(MF); addLiveOutsNoPristines(MBB); } else if (MBB.isReturnBlock()) { // For the return block: Add all callee saved registers. @@ -212,21 +238,27 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - addPristines(*this, MF); + addPristines(MF); addBlockLiveIns(MBB); } void llvm::computeLiveIns(LivePhysRegs &LiveRegs, - const MachineRegisterInfo &MRI, - MachineBasicBlock &MBB) { + const MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - assert(MBB.livein_empty()); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) + for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) LiveRegs.stepBackward(MI); +} - for (unsigned Reg : LiveRegs) { +void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) { + assert(MBB.livein_empty() && "Expected empty live-in list"); + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + for (MCPhysReg Reg : LiveRegs) { if (MRI.isReserved(Reg)) continue; // Skip the register if we are about to add one of its super registers. @@ -242,3 +274,56 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, MBB.addLiveIn(Reg); } } + +void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + + // We walk through the block backwards and start with the live outs. + LivePhysRegs LiveRegs; + LiveRegs.init(TRI); + LiveRegs.addLiveOutsNoPristines(MBB); + + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + // Recompute dead flags. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef() || MO->isDebug()) + continue; + + unsigned Reg = MO->getReg(); + if (Reg == 0) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + + bool IsNotLive = LiveRegs.available(MRI, Reg); + MO->setIsDead(IsNotLive); + } + + // Step backward over defs. + LiveRegs.removeDefs(MI); + + // Recompute kill flags. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->readsReg() || MO->isDebug()) + continue; + + unsigned Reg = MO->getReg(); + if (Reg == 0) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + + bool IsNotLive = LiveRegs.available(MRI, Reg); + MO->setIsKill(IsNotLive); + } + + // Complete the stepbackward. + LiveRegs.addUses(MI); + } +} + +void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs, + MachineBasicBlock &MBB) { + computeLiveIns(LiveRegs, MBB); + addLiveIns(MBB, LiveRegs); +} diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 8c43c9f3f884..66c23b7b69ce 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -1,4 +1,4 @@ -//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===// +//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,9 +12,27 @@ //===----------------------------------------------------------------------===// #include "LiveRangeCalc.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <tuple> +#include <utility> using namespace llvm; @@ -44,7 +62,6 @@ void LiveRangeCalc::reset(const MachineFunction *mf, LiveIn.clear(); } - static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, LiveRange &LR, const MachineOperand &MO) { const MachineInstr &MI = *MO.getParent(); @@ -136,7 +153,6 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { createDeadDef(*Indexes, *Alloc, LR, MO); } - void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, LiveInterval *LI) { SmallVector<SlotIndex, 4> Undefs; @@ -148,7 +164,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation - // by LiveIntervalAnalysis::addKillFlags(). + // by LiveIntervals::addKillFlags(). if (MO.isUse()) MO.setIsKill(false); // MO::readsReg returns "true" for subregister defs. This is for keeping @@ -197,7 +213,6 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, } } - void LiveRangeCalc::updateFromLiveIns() { LiveRangeUpdater Updater; for (const LiveInBlock &I : LiveIn) { @@ -248,7 +263,6 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, calculateValues(); } - // This function is called by a client after using the low-level API to add // live-out and live-in blocks. The unique value optimization is not // available, SplitEditor::transferValues handles that case directly anyway. @@ -259,7 +273,6 @@ void LiveRangeCalc::calculateValues() { updateFromLiveIns(); } - bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, MachineBasicBlock &MBB, BitVector &DefOnEntry, BitVector &UndefOnEntry) { @@ -351,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #ifndef NDEBUG if (MBB->pred_empty()) { MBB->getParent()->verify(); - errs() << "Use of " << PrintReg(PhysReg) + errs() << "Use of " << printReg(PhysReg) << " does not have a corresponding definition on every path:\n"; const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); if (MI != nullptr) @@ -363,8 +376,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); - errs() << "The register " << PrintReg(PhysReg, TRI) - << " needs to be live in to BB#" << MBB->getNumber() + errs() << "The register " << printReg(PhysReg, TRI) + << " needs to be live in to " << printMBBReference(*MBB) << ", but is missing from the live-in list.\n"; report_fatal_error("Invalid global physical register"); } @@ -410,7 +423,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, LiveIn.clear(); FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI); - if (Undefs.size() > 0 && FoundUndef) + if (!Undefs.empty() && FoundUndef) UniqueVNI = false; // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but @@ -454,7 +467,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, LiveIn.reserve(WorkList.size()); for (unsigned BN : WorkList) { MachineBasicBlock *MBB = MF->getBlockNumbered(BN); - if (Undefs.size() > 0 && + if (!Undefs.empty() && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) continue; addLiveInBlock(LR, DomTree->getNode(MBB)); @@ -465,7 +478,6 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, return false; } - // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. void LiveRangeCalc::updateSSA() { diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index d41b782d9bdf..c4914f23f56d 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -1,4 +1,4 @@ -//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===// +//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -26,28 +26,35 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" +#include <utility> namespace llvm { -/// Forward declarations for MachineDominators.h: -class MachineDominatorTree; template <class NodeT> class DomTreeNodeBase; -typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; +class MachineDominatorTree; +class MachineFunction; +class MachineRegisterInfo; + +using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; class LiveRangeCalc { - const MachineFunction *MF; - const MachineRegisterInfo *MRI; - SlotIndexes *Indexes; - MachineDominatorTree *DomTree; - VNInfo::Allocator *Alloc; + const MachineFunction *MF = nullptr; + const MachineRegisterInfo *MRI = nullptr; + SlotIndexes *Indexes = nullptr; + MachineDominatorTree *DomTree = nullptr; + VNInfo::Allocator *Alloc = nullptr; /// LiveOutPair - A value and the block that defined it. The domtree node is /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. - typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>; /// LiveOutMap - Map basic blocks to the value leaving the block. - typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; + using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>; /// Bit vector of active entries in LiveOut, also used as a visited set by /// findReachingDefs. One entry per basic block, indexed by block number. @@ -66,7 +73,7 @@ class LiveRangeCalc { /// registers do not overlap), but the defined/undefined information must /// be kept separate for each individual range. /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. - typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap; + using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>; EntryInfoMap EntryInfos; /// Map each basic block where a live range is live out to the live-out value @@ -105,10 +112,10 @@ class LiveRangeCalc { SlotIndex Kill; // Live-in value filled in by updateSSA once it is known. - VNInfo *Value; + VNInfo *Value = nullptr; LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) - : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {} + : LR(LR), DomNode(node), Kill(kill) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -171,8 +178,7 @@ class LiveRangeCalc { void resetLiveOutMap(); public: - LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr), - DomTree(nullptr), Alloc(nullptr) {} + LiveRangeCalc() = default; //===--------------------------------------------------------------------===// // High-level interface. @@ -186,10 +192,8 @@ public: /// that may overlap a previously computed live range, and before the first /// live range in a function. If live ranges are not known to be /// non-overlapping, call reset before each. - void reset(const MachineFunction *MF, - SlotIndexes*, - MachineDominatorTree*, - VNInfo::Allocator*); + void reset(const MachineFunction *mf, SlotIndexes *SI, + MachineDominatorTree *MDT, VNInfo::Allocator *VNIA); //===--------------------------------------------------------------------===// // Mid-level interface. @@ -282,4 +286,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 92cca1a54951..86cfbd87f5b1 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -14,12 +14,12 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -465,7 +465,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, if (MRI.recomputeRegClass(LI.reg)) DEBUG({ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - dbgs() << "Inflated " << PrintReg(LI.reg) << " to " + dbgs() << "Inflated " << printReg(LI.reg) << " to " << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; }); VRAI.calculateSpillWeightAndHint(LI); diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp index 552f4b5393fe..02e1f3b01ade 100644 --- a/lib/CodeGen/LiveRangeShrink.cpp +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -1,4 +1,4 @@ -//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===// +//===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===// // // The LLVM Compiler Infrastructure // @@ -14,20 +14,32 @@ /// uses, all of which are the only use of the def. /// ///===---------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <iterator> +#include <utility> + +using namespace llvm; #define DEBUG_TYPE "lrshrink" STATISTIC(NumInstrsHoistedToShrinkLiveRange, "Number of insructions hoisted to shrink live range."); -using namespace llvm; - namespace { + class LiveRangeShrink : public MachineFunctionPass { public: static char ID; @@ -45,23 +57,26 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; -} // End anonymous namespace. + +} // end anonymous namespace char LiveRangeShrink::ID = 0; + char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID; INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false, false) -namespace { -typedef DenseMap<MachineInstr *, unsigned> InstOrderMap; + +using InstOrderMap = DenseMap<MachineInstr *, unsigned>; /// Returns \p New if it's dominated by \p Old, otherwise return \p Old. /// \p M maintains a map from instruction to its dominating order that satisfies /// M[A] > M[B] guarantees that A is dominated by B. /// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return /// \p New. -MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old, - const InstOrderMap &M) { +static MachineInstr *FindDominatedInstruction(MachineInstr &New, + MachineInstr *Old, + const InstOrderMap &M) { auto NewIter = M.find(&New); if (NewIter == M.end()) return Old; @@ -82,16 +97,16 @@ MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old, /// Builds Instruction to its dominating order number map \p M by traversing /// from instruction \p Start. -void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) { +static void BuildInstOrderMap(MachineBasicBlock::iterator Start, + InstOrderMap &M) { M.clear(); unsigned i = 0; for (MachineInstr &I : make_range(Start, Start->getParent()->end())) M[&I] = i++; } -} // end anonymous namespace bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 60033db38ee4..bd435968296d 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -15,17 +15,17 @@ #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> using namespace llvm; @@ -102,14 +102,14 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, } void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) - << " to " << PrintReg(PhysReg, TRI) << ':'); + DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) + << " to " << printReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { - DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range); + DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range); Matrix[Unit].unify(VirtReg, Range); return false; }); @@ -120,13 +120,13 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { void LiveRegMatrix::unassign(LiveInterval &VirtReg) { unsigned PhysReg = VRM->getPhys(VirtReg.reg); - DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) - << " from " << PrintReg(PhysReg, TRI) << ':'); + DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) + << " from " << printReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { - DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI)); + DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI)); Matrix[Unit].extract(VirtReg, Range); return false; }); diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index f9ba4ffa6527..9f28db6287ba 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -19,8 +19,8 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -97,23 +97,37 @@ static void addCalleeSavedRegs(LiveRegUnits &LiveUnits, LiveUnits.addReg(*CSR); } -/// Adds pristine registers to the given \p LiveUnits. Pristine registers are -/// callee saved registers that are unused in the function. -static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) { +void LiveRegUnits::addPristines(const MachineFunction &MF) { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!MFI.isCalleeSavedInfoValid()) return; + /// This function will usually be called on an empty object, handle this + /// as a special case. + if (empty()) { + /// Add all callee saved regs, then remove the ones that are saved and + /// restored. + addCalleeSavedRegs(*this, MF); + /// Remove the ones that are not saved/restored; they are pristine. + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + removeReg(Info.getReg()); + return; + } + /// If a callee-saved register that is not pristine is already present + /// in the set, we should make sure that it stays in it. Precompute the + /// set of pristine registers in a separate object. /// Add all callee saved regs, then remove the ones that are saved+restored. - addCalleeSavedRegs(LiveUnits, MF); + LiveRegUnits Pristine(*TRI); + addCalleeSavedRegs(Pristine, MF); /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) - LiveUnits.removeReg(Info.getReg()); + Pristine.removeReg(Info.getReg()); + addUnits(Pristine.getBitVector()); } void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); if (!MBB.succ_empty()) { - addPristines(*this, MF); + addPristines(MF); // To get the live-outs we simply merge the live-ins of all successors. for (const MachineBasicBlock *Succ : MBB.successors()) addBlockLiveIns(*this, *Succ); @@ -127,6 +141,6 @@ void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) { void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - addPristines(*this, MF); + addPristines(MF); addBlockLiveIns(*this, MBB); } diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index b51f8b0aa6bb..b0e58b0e3e5f 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -14,12 +14,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "livestacks" diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index a9aec926115a..032dd66ae1d2 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <algorithm> using namespace llvm; @@ -235,7 +234,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) { // Otherwise, the last sub-register def implicitly defines this register. // e.g. // AH = - // AL = ... <imp-def EAX>, <imp-kill AH> + // AL = ... implicit-def EAX, implicit killed AH // = AH // ... // = EAX @@ -321,17 +320,17 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // AH = // // = AX - // = AL, AX<imp-use, kill> + // = AL, implicit killed AX // AX = // // Or whole register is defined, but not used at all. - // AX<dead> = + // dead AX = // ... // AX = // // Or whole register is defined, but only partly used. - // AX<dead> = AL<imp-def> - // = AL<kill> + // dead AX = implicit-def AL + // = killed AL // AX = MachineInstr *LastPartDef = nullptr; unsigned LastPartDefDist = 0; @@ -364,7 +363,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { if (!PhysRegUse[Reg]) { // Partial uses. Mark register def dead and add implicit def of // sub-registers which are used. - // EAX<dead> = op AL<imp-def> + // dead EAX = op implicit-def AL // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index b109f1922a3e..c0da37ede849 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,29 +14,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <tuple> using namespace llvm; @@ -47,6 +48,7 @@ STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); STATISTIC(NumReplacements, "Number of frame indices references replaced"); namespace { + class FrameRef { MachineBasicBlock::iterator MI; // Instr referencing the frame int64_t LocalOffset; // Local offset of the frame idx referenced @@ -72,9 +74,10 @@ namespace { }; class LocalStackSlotPass: public MachineFunctionPass { - SmallVector<int64_t,16> LocalOffsets; + SmallVector<int64_t, 16> LocalOffsets; + /// StackObjSet - A set of stack object indexes - typedef SmallSetVector<int, 8> StackObjSet; + using StackObjSet = SmallSetVector<int, 8>; void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign); @@ -84,11 +87,14 @@ namespace { int64_t &Offset, unsigned &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); } + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -96,20 +102,20 @@ namespace { AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } - - private: }; + } // end anonymous namespace char LocalStackSlotPass::ID = 0; + char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; + INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) - bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -178,7 +184,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { - for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { int i = *I; @@ -189,7 +194,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. -/// void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo &MFI = Fn.getFrameInfo(); @@ -397,7 +401,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { continue; } - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp index 0fc48d4e0b6b..0cf578b50563 100644 --- a/lib/CodeGen/LowerEmuTLS.cpp +++ b/lib/CodeGen/LowerEmuTLS.cpp @@ -16,11 +16,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp new file mode 100644 index 000000000000..4b676a60a8cd --- /dev/null +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -0,0 +1,625 @@ +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to employ a canonical code transformation so +// that code compiled with slightly different IR passes can be diffed more +// effectively than otherwise. This is done by renaming vregs in a given +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to +// move defs closer to their use inorder to reduce diffs caused by slightly +// different schedules. +// +// Basic Usage: +// +// llc -o - -run-pass mir-canonicalizer example.mir +// +// Reorders instructions canonically. +// Renames virtual register operands canonically. +// Strips certain MIR artifacts (optionally). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +using namespace llvm; + +namespace llvm { +extern char &MIRCanonicalizerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-canonicalizer" + +static cl::opt<unsigned> +CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("Function number to canonicalize.")); + +static cl::opt<unsigned> +CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("BasicBlock number to canonicalize.")); + +namespace { + +class MIRCanonicalizer : public MachineFunctionPass { +public: + static char ID; + MIRCanonicalizer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename register operands in a canonical ordering."; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { + VRType type; + unsigned reg; + +public: + TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} + TypedVReg(VRType type) : type(type), reg(~0U) { + assert(type != RSE_Reg && "Expected a non-register type."); + } + + bool isReg() const { return type == RSE_Reg; } + bool isFrameIndex() const { return type == RSE_FrameIndex; } + bool isCandidate() const { return type == RSE_NewCandidate; } + + VRType getType() const { return type; } + unsigned getReg() const { + assert(this->isReg() && "Expected a virtual or physical register."); + return reg; + } +}; + +char MIRCanonicalizer::ID; + +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; + +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + std::vector<MachineBasicBlock *> RPOList; + for (auto MBB : RPOT) { + RPOList.push_back(MBB); + } + + return RPOList; +} + +// Set a dummy vreg. We use this vregs register class to generate throw-away +// vregs that are used to skip vreg numbers so that vreg numbers line up. +static unsigned GetDummyVReg(const MachineFunction &MF) { + for (auto &MBB : MF) { + for (auto &MI : MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + return MO.getReg(); + } + } + } + + return ~0U; +} + +static bool rescheduleCanonically(MachineBasicBlock *MBB) { + + bool Changed = false; + + // Calculates the distance of MI from the begining of its parent BB. + auto getInstrIdx = [](const MachineInstr &MI) { + unsigned i = 0; + for (auto &CurMI : *MI.getParent()) { + if (&CurMI == &MI) + return i; + i++; + } + return ~0U; + }; + + // Pre-Populate vector of instructions to reschedule so that we don't + // clobber the iterator. + std::vector<MachineInstr *> Instructions; + for (auto &MI : *MBB) { + Instructions.push_back(&MI); + } + + for (auto *II : Instructions) { + if (II->getNumOperands() == 0) + continue; + + MachineOperand &MO = II->getOperand(0); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + + MachineInstr *Def = II; + unsigned Distance = ~0U; + MachineInstr *UseToBringDefCloserTo = nullptr; + MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { + MachineInstr *UseInst = UO.getParent(); + + const unsigned DefLoc = getInstrIdx(*Def); + const unsigned UseLoc = getInstrIdx(*UseInst); + const unsigned Delta = (UseLoc - DefLoc); + + if (UseInst->getParent() != Def->getParent()) + continue; + if (DefLoc >= UseLoc) + continue; + + if (Delta < Distance) { + Distance = Delta; + UseToBringDefCloserTo = UseInst; + } + } + + const auto BBE = MBB->instr_end(); + MachineBasicBlock::iterator DefI = BBE; + MachineBasicBlock::iterator UseI = BBE; + + for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { + + if (DefI != BBE && UseI != BBE) + break; + + if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) + continue; + + if (&*BBI == Def) { + DefI = BBI; + continue; + } + + if (&*BBI == UseToBringDefCloserTo) { + UseI = BBI; + continue; + } + } + + if (DefI == BBE || UseI == BBE) + continue; + + DEBUG({ + dbgs() << "Splicing "; + DefI->dump(); + dbgs() << " right before: "; + UseI->dump(); + }); + + Changed = true; + MBB->splice(UseI, MBB, DefI); + } + + return Changed; +} + +/// Here we find our candidates. What makes an interesting candidate? +/// An candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk inorder to build a canonical walk which should result +/// in canoncal vreg renaming. +static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { + std::vector<MachineInstr *> Candidates; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + MachineInstr *MI = &*II; + + bool DoesMISideEffect = false; + + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { + const unsigned Dst = MI->getOperand(0).getReg(); + DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + if (DoesMISideEffect) break; + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); + } + } + + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) + continue; + + DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + Candidates.push_back(MI); + } + + return Candidates; +} + +static void doCandidateWalk(std::vector<TypedVReg> &VRegs, + std::queue<TypedVReg> &RegQueue, + std::vector<MachineInstr *> &VisitedMIs, + const MachineBasicBlock *MBB) { + + const MachineFunction &MF = *MBB->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + while (!RegQueue.empty()) { + + auto TReg = RegQueue.front(); + RegQueue.pop(); + + if (TReg.isFrameIndex()) { + DEBUG(dbgs() << "Popping frame index.\n";); + VRegs.push_back(TypedVReg(RSE_FrameIndex)); + continue; + } + + assert(TReg.isReg() && "Expected vreg or physreg."); + unsigned Reg = TReg.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DEBUG({ + dbgs() << "Popping vreg "; + MRI.def_begin(Reg)->dump(); + dbgs() << "\n"; + }); + + if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { + return TR.isReg() && TR.getReg() == Reg; + })) { + VRegs.push_back(TypedVReg(Reg)); + } + } else { + DEBUG(dbgs() << "Popping physreg.\n";); + VRegs.push_back(TypedVReg(Reg)); + continue; + } + + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { + MachineInstr *Def = RI->getParent(); + + if (Def->getParent() != MBB) + continue; + + if (llvm::any_of(VisitedMIs, + [&](const MachineInstr *VMI) { return Def == VMI; })) { + break; + } + + DEBUG({ + dbgs() << "\n========================\n"; + dbgs() << "Visited MI: "; + Def->dump(); + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; + dbgs() << "\n========================\n"; + }); + VisitedMIs.push_back(Def); + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + + MachineOperand &MO = Def->getOperand(I); + if (MO.isFI()) { + DEBUG(dbgs() << "Pushing frame index.\n";); + RegQueue.push(TypedVReg(RSE_FrameIndex)); + } + + if (!MO.isReg()) + continue; + RegQueue.push(TypedVReg(MO.getReg())); + } + } + } +} + +// TODO: Work to remove this in the future. One day when we have named vregs +// we should be able to form the canonical name based on some characteristic +// we see in that point of the expression tree (like if we were to name based +// on some sort of value numbering scheme). +static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) { + const unsigned VR_GAP = (++VRegGapIndex * 1000); + + DEBUG({ + dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " + << VR_GAP << "\n"; + }); + + unsigned I = MRI.createVirtualRegister(RC); + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + while (I != E) { + I = MRI.createVirtualRegister(RC); + } +} + +static std::map<unsigned, unsigned> +GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, + const std::vector<unsigned> &renamedInOtherBB, + MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) { + std::map<unsigned, unsigned> VRegRenameMap; + unsigned LastRenameReg = MRI.createVirtualRegister(RC); + bool FirstCandidate = true; + + for (auto &vreg : VRegs) { + if (vreg.isFrameIndex()) { + // We skip one vreg for any frame index because there is a good chance + // (especially when comparing SelectionDAG to GlobalISel generated MIR) + // that in the other file we are just getting an incoming vreg that comes + // from a copy from a frame index. So it's safe to skip by one. + LastRenameReg = MRI.createVirtualRegister(RC); + DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + continue; + } else if (vreg.isCandidate()) { + + // After the first candidate, for every subsequent candidate, we skip mod + // 10 registers so that the candidates are more likely to start at the + // same vreg number making it more likely that the canonical walk from the + // candidate insruction. We don't need to skip from the first candidate of + // the BasicBlock because we already skip ahead several vregs for each BB. + while (LastRenameReg % 10) { + if (!FirstCandidate) break; + LastRenameReg = MRI.createVirtualRegister(RC); + + DEBUG({ + dbgs() << "Skipping rename for new candidate " << LastRenameReg + << "\n"; + }); + } + FirstCandidate = false; + continue; + } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { + LastRenameReg = MRI.createVirtualRegister(RC); + DEBUG({ + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; + }); + continue; + } + + auto Reg = vreg.getReg(); + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { + DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); + continue; + } + + auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + LastRenameReg = Rename; + + if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { + DEBUG(dbgs() << "Mapping vreg ";); + if (MRI.reg_begin(Reg) != MRI.reg_end()) { + DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + } else { + DEBUG(dbgs() << Reg;); + } + DEBUG(dbgs() << " to ";); + if (MRI.reg_begin(Rename) != MRI.reg_end()) { + DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + } else { + DEBUG(dbgs() << Rename;); + } + DEBUG(dbgs() << "\n";); + + VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); + } + } + + return VRegRenameMap; +} + +static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, + const std::map<unsigned, unsigned> &VRegRenameMap, + MachineRegisterInfo &MRI) { + bool Changed = false; + for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + + auto VReg = I->first; + auto Rename = I->second; + + RenamedInOtherBB.push_back(Rename); + + std::vector<MachineOperand *> RenameMOs; + for (auto &MO : MRI.reg_operands(VReg)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + Changed = true; + MO->setReg(Rename); + + if (!MO->isDef()) + MO->setIsKill(false); + } + } + + return Changed; +} + +static bool doDefKillClear(MachineBasicBlock *MBB) { + bool Changed = false; + + for (auto &MI : *MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (!MO.isDef() && MO.isKill()) { + Changed = true; + MO.setIsKill(false); + } + + if (MO.isDef() && MO.isDead()) { + Changed = true; + MO.setIsDead(false); + } + } + } + + return Changed; +} + +static bool runOnBasicBlock(MachineBasicBlock *MBB, + std::vector<StringRef> &bbNames, + std::vector<unsigned> &renamedInOtherBB, + unsigned &basicBlockNum, unsigned &VRegGapIndex) { + + if (CanonicalizeBasicBlockNumber != ~0U) { + if (CanonicalizeBasicBlockNumber != basicBlockNum++) + return false; + DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); + } + + if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { + DEBUG({ + dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() + << "\n"; + }); + return false; + } + + DEBUG({ + dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + }); + + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const unsigned DummyVReg = GetDummyVReg(MF); + const TargetRegisterClass *DummyRC = + (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); + if (!DummyRC) return false; + + bbNames.push_back(MBB->getName()); + DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + + DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); + Changed |= rescheduleCanonically(MBB); + DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + + std::vector<MachineInstr *> Candidates = populateCandidates(MBB); + std::vector<MachineInstr *> VisitedMIs; + std::copy(Candidates.begin(), Candidates.end(), + std::back_inserter(VisitedMIs)); + + std::vector<TypedVReg> VRegs; + for (auto candidate : Candidates) { + VRegs.push_back(TypedVReg(RSE_NewCandidate)); + + std::queue<TypedVReg> RegQueue; + + // Here we walk the vreg operands of a non-root node along our walk. + // The root nodes are the original candidates (stores normally). + // These are normally not the root nodes (except for the case of copies to + // physical registers). + for (unsigned i = 1; i < candidate->getNumOperands(); i++) { + if (candidate->mayStore() || candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + + DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + RegQueue.push(TypedVReg(MO.getReg())); + } + + // Here we walk the root candidates. We start from the 0th operand because + // the root is normally a store to a vreg. + for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + + if (!candidate->mayStore() && !candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + + // TODO: Do we want to only add vregs here? + if (!MO.isReg() && !MO.isFI()) + continue; + + DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : + TypedVReg(RSE_FrameIndex)); + } + + doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); + } + + // If we have populated no vregs to rename then bail. + // The rest of this function does the vreg remaping. + if (VRegs.size() == 0) + return Changed; + + // Skip some vregs, so we can recon where we'll land next. + SkipVRegs(VRegGapIndex, MRI, DummyRC); + + auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + Changed |= doDefKillClear(MBB); + + DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); + DEBUG(dbgs() << "\n\n================================================\n\n"); + return Changed; +} + +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { + + static unsigned functionNum = 0; + if (CanonicalizeFunctionNumber != ~0U) { + if (CanonicalizeFunctionNumber != functionNum++) + return false; + DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); + } + + // we need a valid vreg to create a vreg type for skipping all those + // stray vreg numbers so reach alignment/canonical vreg values. + std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); + + DEBUG( + dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; + for (auto MBB : RPOList) { + dbgs() << MBB->getName() << "\n"; + } + dbgs() << "\n\n================================================\n\n"; + ); + + std::vector<StringRef> BBNames; + std::vector<unsigned> RenamedInOtherBB; + + unsigned GapIdx = 0; + unsigned BBNum = 0; + + bool Changed = false; + + for (auto MBB : RPOList) + Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); + + return Changed; +} + diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt index 2852124786e3..6269b005a985 100644 --- a/lib/CodeGen/MIRParser/LLVMBuild.txt +++ b/lib/CodeGen/MIRParser/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = MIRParser parent = CodeGen -required_libraries = AsmParser CodeGen Core MC Support Target +required_libraries = AsmParser BinaryFormat CodeGen Core MC Support Target diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 58a655a4dee4..6adb7f1288d7 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -1,4 +1,4 @@ -//===- MILexer.cpp - Machine instructions lexer implementation ----------===// +//===- MILexer.cpp - Machine instructions lexer implementation ------------===// // // The LLVM Compiler Infrastructure // @@ -12,27 +12,33 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include <algorithm> +#include <cassert> #include <cctype> +#include <string> using namespace llvm; namespace { -typedef function_ref<void(StringRef::iterator Loc, const Twine &)> - ErrorCallbackType; +using ErrorCallbackType = + function_ref<void(StringRef::iterator Loc, const Twine &)>; /// This class provides a way to iterate and get characters from the source /// string. class Cursor { - const char *Ptr; - const char *End; + const char *Ptr = nullptr; + const char *End = nullptr; public: - Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} + Cursor(NoneType) {} explicit Cursor(StringRef Str) { Ptr = Str.data(); @@ -202,14 +208,24 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("internal", MIToken::kw_internal) .Case("early-clobber", MIToken::kw_early_clobber) .Case("debug-use", MIToken::kw_debug_use) + .Case("renamable", MIToken::kw_renamable) .Case("tied-def", MIToken::kw_tied_def) .Case("frame-setup", MIToken::kw_frame_setup) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) + .Case("rel_offset", MIToken::kw_cfi_rel_offset) .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) + .Case("escape", MIToken::kw_cfi_escape) .Case("def_cfa", MIToken::kw_cfi_def_cfa) + .Case("remember_state", MIToken::kw_cfi_remember_state) + .Case("restore", MIToken::kw_cfi_restore) + .Case("restore_state", MIToken::kw_cfi_restore_state) + .Case("undefined", MIToken::kw_cfi_undefined) + .Case("register", MIToken::kw_cfi_register) + .Case("window_save", MIToken::kw_cfi_window_save) .Case("blockaddress", MIToken::kw_blockaddress) .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) @@ -270,6 +286,9 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, C.advance(); StringRef Number = NumberRange.upto(C); unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' + // TODO: The format bb.<id>.<irname> is supported only when it's not a + // reference. Once we deprecate the format where the irname shows up, we + // should only lex forward if it is a reference. if (C.peek() == '.') { C.advance(); // Skip '.' ++StringOffset; @@ -490,6 +509,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { .Case("!alias.scope", MIToken::md_alias_scope) .Case("!noalias", MIToken::md_noalias) .Case("!range", MIToken::md_range) + .Case("!DIExpression", MIToken::md_diexpr) .Default(MIToken::Error); } diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 08b82e59c4fc..0204d549d5d4 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -1,4 +1,4 @@ -//===- MILexer.h - Lexer for machine instructions -------------------------===// +//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,7 +18,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include <functional> +#include <string> namespace llvm { @@ -60,14 +60,24 @@ struct MIToken { kw_internal, kw_early_clobber, kw_debug_use, + kw_renamable, kw_tied_def, kw_frame_setup, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, + kw_cfi_rel_offset, kw_cfi_def_cfa_register, kw_cfi_def_cfa_offset, + kw_cfi_adjust_cfa_offset, + kw_cfi_escape, kw_cfi_def_cfa, + kw_cfi_register, + kw_cfi_remember_state, + kw_cfi_restore, + kw_cfi_restore_state, + kw_cfi_undefined, + kw_cfi_window_save, kw_blockaddress, kw_intrinsic, kw_target_index, @@ -100,6 +110,7 @@ struct MIToken { md_alias_scope, md_noalias, md_range, + md_diexpr, // Identifier tokens Identifier, @@ -132,14 +143,14 @@ struct MIToken { }; private: - TokenKind Kind; + TokenKind Kind = Error; StringRef Range; StringRef StringValue; std::string StringValueStorage; APSInt IntVal; public: - MIToken() : Kind(Error) {} + MIToken() = default; MIToken &reset(TokenKind Kind, StringRef Range); @@ -164,7 +175,8 @@ public: return Kind == kw_implicit || Kind == kw_implicit_define || Kind == kw_def || Kind == kw_dead || Kind == kw_killed || Kind == kw_undef || Kind == kw_internal || - Kind == kw_early_clobber || Kind == kw_debug_use; + Kind == kw_early_clobber || Kind == kw_debug_use || + Kind == kw_renamable; } bool isMemoryOperandFlag() const { @@ -203,4 +215,4 @@ StringRef lexMIToken( } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c68d87b15a31..1a78ae3aad07 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MILexer.h" #include "MIParser.h" +#include "MILexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" @@ -21,8 +21,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" @@ -33,12 +33,15 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -63,11 +66,8 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cctype> @@ -209,9 +209,11 @@ public: bool parseJumpTableIndexOperand(MachineOperand &Dest); bool parseExternalSymbolOperand(MachineOperand &Dest); bool parseMDNode(MDNode *&Node); + bool parseDIExpression(MDNode *&Node); bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); bool parseCFIRegister(unsigned &Reg); + bool parseCFIEscapeValues(std::string& Values); bool parseCFIOperand(MachineOperand &Dest); bool parseIRBlock(BasicBlock *&BB, const Function &F); bool parseBlockAddressOperand(MachineOperand &Dest); @@ -429,7 +431,7 @@ bool MIParser::parseBasicBlockDefinition( break; case MIToken::IRBlock: // TODO: Report an error when both name and ir block are specified. - if (parseIRBlock(BB, *MF.getFunction())) + if (parseIRBlock(BB, MF.getFunction())) return true; lex(); break; @@ -445,7 +447,7 @@ bool MIParser::parseBasicBlockDefinition( if (!Name.empty()) { BB = dyn_cast_or_null<BasicBlock>( - MF.getFunction()->getValueSymbolTable()->lookup(Name)); + MF.getFunction().getValueSymbolTable()->lookup(Name)); if (!BB) return error(Loc, Twine("basic block '") + Name + "' is not defined in the function '" + @@ -854,10 +856,14 @@ bool MIParser::parseStandaloneStackObject(int &FI) { bool MIParser::parseStandaloneMDNode(MDNode *&Node) { lex(); - if (Token.isNot(MIToken::exclaim)) + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(Node)) + return true; + } else if (Token.is(MIToken::md_diexpr)) { + if (parseDIExpression(Node)) + return true; + } else return error("expected a metadata node"); - if (parseMDNode(Node)) - return true; if (Token.isNot(MIToken::Eof)) return error("expected end of string after the metadata node"); return false; @@ -1054,6 +1060,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { case MIToken::kw_debug_use: Flags |= RegState::Debug; break; + case MIToken::kw_renamable: + Flags |= RegState::Renamable; + break; default: llvm_unreachable("The current token should be a register flag"); } @@ -1206,7 +1215,8 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, Reg, Flags & RegState::Define, Flags & RegState::Implicit, Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug, - Flags & RegState::InternalRead); + Flags & RegState::InternalRead, Flags & RegState::Renamable); + return false; } @@ -1224,7 +1234,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, const Constant *&C) { auto Source = StringValue.str(); // The source has to be null terminated. SMDiagnostic Err; - C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(), + C = parseConstantValue(Source, Err, *MF.getFunction().getParent(), &PFS.IRSlots); if (!C) return error(Loc + Err.getColumnNo(), Err.getMessage()); @@ -1244,7 +1254,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { lex(); return false; } else if (Token.is(MIToken::PointerType)) { - const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + const DataLayout &DL = MF.getDataLayout(); unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); lex(); @@ -1338,6 +1348,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { return error(Twine("use of undefined machine basic block #") + Twine(Number)); MBB = MBBInfo->second; + // TODO: Only parse the name if it's a MachineBasicBlockLabel. Deprecate once + // we drop the <irname> from the bb.<id>.<irname> format. if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) return error(Twine("the name of machine basic block #") + Twine(Number) + " isn't '" + Token.stringValue() + "'"); @@ -1407,7 +1419,7 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { bool MIParser::parseGlobalValue(GlobalValue *&GV) { switch (Token.kind()) { case MIToken::NamedGlobalValue: { - const Module *M = MF.getFunction()->getParent(); + const Module *M = MF.getFunction().getParent(); GV = M->getNamedValue(Token.stringValue()); if (!GV) return error(Twine("use of undefined global value '") + Token.range() + @@ -1492,6 +1504,7 @@ bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) { bool MIParser::parseMDNode(MDNode *&Node) { assert(Token.is(MIToken::exclaim)); + auto Loc = Token.location(); lex(); if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) @@ -1507,10 +1520,56 @@ bool MIParser::parseMDNode(MDNode *&Node) { return false; } +bool MIParser::parseDIExpression(MDNode *&Expr) { + assert(Token.is(MIToken::md_diexpr)); + lex(); + + // FIXME: Share this parsing with the IL parser. + SmallVector<uint64_t, 8> Elements; + + if (expectAndConsume(MIToken::lparen)) + return true; + + if (Token.isNot(MIToken::rparen)) { + do { + if (Token.is(MIToken::Identifier)) { + if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) { + lex(); + Elements.push_back(Op); + continue; + } + return error(Twine("invalid DWARF op '") + Token.stringValue() + "'"); + } + + if (Token.isNot(MIToken::IntegerLiteral) || + Token.integerValue().isSigned()) + return error("expected unsigned integer"); + + auto &U = Token.integerValue(); + if (U.ugt(UINT64_MAX)) + return error("element too large, limit is " + Twine(UINT64_MAX)); + Elements.push_back(U.getZExtValue()); + lex(); + + } while (consumeIfPresent(MIToken::comma)); + } + + if (expectAndConsume(MIToken::rparen)) + return true; + + Expr = DIExpression::get(MF.getFunction().getContext(), Elements); + return false; +} + bool MIParser::parseMetadataOperand(MachineOperand &Dest) { MDNode *Node = nullptr; - if (parseMDNode(Node)) - return true; + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(Node)) + return true; + } else if (Token.is(MIToken::md_diexpr)) { + if (parseDIExpression(Node)) + return true; + } Dest = MachineOperand::CreateMetadata(Node); return false; } @@ -1541,6 +1600,21 @@ bool MIParser::parseCFIRegister(unsigned &Reg) { return false; } +bool MIParser::parseCFIEscapeValues(std::string &Values) { + do { + if (Token.isNot(MIToken::HexLiteral)) + return error("expected a hexadecimal literal"); + unsigned Value; + if (getUnsigned(Value)) + return true; + if (Value > UINT8_MAX) + return error("expected a 8-bit integer (too large)"); + Values.push_back(static_cast<uint8_t>(Value)); + lex(); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + bool MIParser::parseCFIOperand(MachineOperand &Dest) { auto Kind = Token.kind(); lex(); @@ -1560,6 +1634,13 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); break; + case MIToken::kw_cfi_rel_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createRelOffset(nullptr, Reg, Offset)); + break; case MIToken::kw_cfi_def_cfa_register: if (parseCFIRegister(Reg)) return true; @@ -1573,6 +1654,12 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); break; + case MIToken::kw_cfi_adjust_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createAdjustCfaOffset(nullptr, Offset)); + break; case MIToken::kw_cfi_def_cfa: if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || parseCFIOffset(Offset)) @@ -1581,6 +1668,42 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); break; + case MIToken::kw_cfi_remember_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + break; + case MIToken::kw_cfi_restore: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg)); + break; + case MIToken::kw_cfi_restore_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + break; + case MIToken::kw_cfi_undefined: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg)); + break; + case MIToken::kw_cfi_register: { + unsigned Reg2; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(Reg2)) + return true; + + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRegister(nullptr, Reg, Reg2)); + break; + } + case MIToken::kw_cfi_window_save: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); + break; + case MIToken::kw_cfi_escape: { + std::string Values; + if (parseCFIEscapeValues(Values)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, Values)); + break; + } default: // TODO: Parse the other CFI operands. llvm_unreachable("The current token should be a cfi operand"); @@ -1819,6 +1942,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::kw_internal: case MIToken::kw_early_clobber: case MIToken::kw_debug_use: + case MIToken::kw_renamable: case MIToken::underscore: case MIToken::NamedRegister: case MIToken::VirtualRegister: @@ -1851,13 +1975,23 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseExternalSymbolOperand(Dest); case MIToken::SubRegisterIndex: return parseSubRegisterIndexOperand(Dest); + case MIToken::md_diexpr: case MIToken::exclaim: return parseMetadataOperand(Dest); case MIToken::kw_cfi_same_value: case MIToken::kw_cfi_offset: + case MIToken::kw_cfi_rel_offset: case MIToken::kw_cfi_def_cfa_register: case MIToken::kw_cfi_def_cfa_offset: + case MIToken::kw_cfi_adjust_cfa_offset: + case MIToken::kw_cfi_escape: case MIToken::kw_cfi_def_cfa: + case MIToken::kw_cfi_register: + case MIToken::kw_cfi_remember_state: + case MIToken::kw_cfi_restore: + case MIToken::kw_cfi_restore_state: + case MIToken::kw_cfi_undefined: + case MIToken::kw_cfi_window_save: return parseCFIOperand(Dest); case MIToken::kw_blockaddress: return parseBlockAddressOperand(Dest); @@ -1968,7 +2102,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) { bool MIParser::parseIRValue(const Value *&V) { switch (Token.kind()) { case MIToken::NamedIRValue: { - V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue()); + V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); break; } case MIToken::IRValue: { @@ -2029,8 +2163,11 @@ bool MIParser::getHexUint(APInt &Result) { return true; StringRef V = S.substr(2); APInt A(V.size()*4, V, 16); - Result = APInt(A.getActiveBits(), - ArrayRef<uint64_t>(A.getRawData(), A.getNumWords())); + + // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make + // sure it isn't the case before constructing result. + unsigned NumBits = (A == 0) ? 32 : A.getActiveBits(); + Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords())); return false; } @@ -2216,9 +2353,15 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { Flags |= MachineMemOperand::MOStore; lex(); + // Optional 'store' for operands that both load and store. + if (Token.is(MIToken::Identifier) && Token.stringValue() == "store") { + Flags |= MachineMemOperand::MOStore; + lex(); + } + // Optional synchronization scope. SyncScope::ID SSID; - if (parseOptionalScope(MF.getFunction()->getContext(), SSID)) + if (parseOptionalScope(MF.getFunction().getContext(), SSID)) return true; // Up to two atomic orderings (cmpxchg provides guarantees on failure). @@ -2238,7 +2381,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { MachinePointerInfo Ptr = MachinePointerInfo(); if (Token.is(MIToken::Identifier)) { - const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; + const char *Word = + ((Flags & MachineMemOperand::MOLoad) && + (Flags & MachineMemOperand::MOStore)) + ? "on" + : Flags & MachineMemOperand::MOLoad ? "from" : "into"; if (Token.stringValue() != Word) return error(Twine("expected '") + Word + "'"); lex(); @@ -2395,12 +2542,12 @@ static const BasicBlock *getIRBlockFromSlot( const BasicBlock *MIParser::getIRBlock(unsigned Slot) { if (Slots2BasicBlocks.empty()) - initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks); + initSlots2BasicBlocks(MF.getFunction(), Slots2BasicBlocks); return getIRBlockFromSlot(Slot, Slots2BasicBlocks); } const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { - if (&F == MF.getFunction()) + if (&F == &MF.getFunction()) return getIRBlock(Slot); DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks; initSlots2BasicBlocks(F, CustomSlots2BasicBlocks); @@ -2431,7 +2578,7 @@ static void initSlots2Values(const Function &F, const Value *MIParser::getIRValue(unsigned Slot) { if (Slots2Values.empty()) - initSlots2Values(*MF.getFunction(), Slots2Values); + initSlots2Values(MF.getFunction(), Slots2Values); auto ValueInfo = Slots2Values.find(Slot); if (ValueInfo == Slots2Values.end()) return nullptr; diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 78b57f357781..7d8e62736a34 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -120,7 +120,7 @@ public: bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector<CalleeSavedInfo> &CSIInfo, const yaml::StringValue &RegisterSource, - int FrameIdx); + bool IsRestored, int FrameIdx); bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, const yaml::MachineStackObject &Object, @@ -214,6 +214,9 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { case SourceMgr::DK_Note: Kind = DS_Note; break; + case SourceMgr::DK_Remark: + llvm_unreachable("remark unexpected"); + break; } Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); } @@ -438,6 +441,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, if (StringRef(VReg.Class.Value).equals("_")) { Info.Kind = VRegInfo::GENERIC; + Info.D.RegBank = nullptr; } else { const auto *RC = getRegClass(MF, VReg.Class.Value); if (RC) { @@ -547,7 +551,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { MachineFunction &MF = PFS.MF; MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); @@ -587,6 +591,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, else ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + MFI.setStackID(ObjectIdx, Object.StackID); if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) .second) @@ -594,7 +599,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Twine("redefinition of fixed stack object '%fixed-stack.") + Twine(Object.ID.Value) + "'"); if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, - ObjectIdx)) + Object.CalleeSavedRestored, ObjectIdx)) return true; } @@ -619,13 +624,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Object.Size, Object.Alignment, Object.Type == yaml::MachineStackObject::SpillSlot, Alloca); MFI.setObjectOffset(ObjectIdx, Object.Offset); + MFI.setStackID(ObjectIdx, Object.StackID); + if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) .second) return error(Object.ID.SourceRange.Start, Twine("redefinition of stack object '%stack.") + Twine(Object.ID.Value) + "'"); if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, - ObjectIdx)) + Object.CalleeSavedRestored, ObjectIdx)) return true; if (Object.LocalOffset) MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); @@ -650,14 +657,16 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector<CalleeSavedInfo> &CSIInfo, - const yaml::StringValue &RegisterSource, int FrameIdx) { + const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) { if (RegisterSource.Value.empty()) return false; unsigned Reg = 0; SMDiagnostic Error; if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error)) return error(Error, RegisterSource.SourceRange); - CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx)); + CalleeSavedInfo CSI(Reg, FrameIdx); + CSI.setRestored(IsRestored); + CSIInfo.push_back(CSI); return false; } @@ -713,9 +722,13 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) { DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots; const MachineFunction &MF = PFS.MF; - const auto &M = *MF.getFunction()->getParent(); + const auto &M = *MF.getFunction().getParent(); SMDiagnostic Error; for (const auto &YamlConstant : YamlMF.Constants) { + if (YamlConstant.IsTargetSpecific) + // FIXME: Support target-specific constant pools + return error(YamlConstant.Value.SourceRange.Start, + "Can't parse target-specific constant pool entries yet"); const Constant *Value = dyn_cast_or_null<Constant>( parseConstantValue(YamlConstant.Value.Value, Error, M)); if (!Value) diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index ddeacf1d1bfb..3568f96d2b9a 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -12,16 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -31,19 +33,20 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" @@ -57,13 +60,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cinttypes> @@ -75,7 +75,8 @@ using namespace llvm; -static cl::opt<bool> SimplifyMIR("simplify-mir", +static cl::opt<bool> SimplifyMIR( + "simplify-mir", cl::Hidden, cl::desc("Leave out unnecessary information when printing MIR")); namespace { @@ -156,15 +157,13 @@ public: void print(const MachineBasicBlock &MBB); void print(const MachineInstr &MI); - void printMBBReference(const MachineBasicBlock &MBB); void printIRBlockReference(const BasicBlock &BB); void printIRValueReference(const Value &V); void printStackObjectReference(int FrameIndex); void printOffset(int64_t Offset); - void printTargetFlags(const MachineOperand &Op); - void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, - LLT TypeToPrint, bool IsDef = false); + void print(const MachineInstr &MI, unsigned OpIdx, + const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, + LLT TypeToPrint, bool PrintDef = true); void print(const LLVMContext &Context, const TargetInstrInfo &TII, const MachineMemOperand &Op); void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); @@ -192,23 +191,10 @@ template <> struct BlockScalarTraits<Module> { } // end namespace yaml } // end namespace llvm -static void printReg(unsigned Reg, raw_ostream &OS, - const TargetRegisterInfo *TRI) { - // TODO: Print Stack Slots. - if (!Reg) - OS << '_'; - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); - else if (Reg < TRI->getNumRegs()) - OS << '%' << StringRef(TRI->getName(Reg)).lower(); - else - llvm_unreachable("Can't print this kind of register yet"); -} - -static void printReg(unsigned Reg, yaml::StringValue &Dest, - const TargetRegisterInfo *TRI) { +static void printRegMIR(unsigned Reg, yaml::StringValue &Dest, + const TargetRegisterInfo *TRI) { raw_string_ostream OS(Dest.Value); - printReg(Reg, OS, TRI); + OS << printReg(Reg, TRI); } void MIRPrinter::print(const MachineFunction &MF) { @@ -227,8 +213,8 @@ void MIRPrinter::print(const MachineFunction &MF) { MachineFunctionProperties::Property::Selected); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); - ModuleSlotTracker MST(MF.getFunction()->getParent()); - MST.incorporateFunction(*MF.getFunction()); + ModuleSlotTracker MST(MF.getFunction().getParent()); + MST.incorporateFunction(MF.getFunction()); convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); if (const auto *ConstantPool = MF.getConstantPool()) @@ -262,7 +248,7 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS, if (RegMask[I / 32] & (1u << (I % 32))) { if (IsRegInRegMaskFound) OS << ','; - printReg(I, OS, TRI); + OS << printReg(I, TRI); IsRegInRegMaskFound = true; } } @@ -270,6 +256,14 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS, OS << ')'; } +static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { + raw_string_ostream OS(Dest.Value); + OS << printRegClassOrBank(Reg, RegInfo, TRI); +} + + void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI) { @@ -280,28 +274,19 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, unsigned Reg = TargetRegisterInfo::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; - if (RegInfo.getRegClassOrNull(Reg)) - VReg.Class = - StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); - else if (RegInfo.getRegBankOrNull(Reg)) - VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); - else { - VReg.Class = std::string("_"); - assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && - "Generic registers must have a valid type"); - } + ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI); unsigned PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) - printReg(PreferredReg, VReg.PreferredRegister, TRI); + printRegMIR(PreferredReg, VReg.PreferredRegister, TRI); MF.VirtualRegisters.push_back(VReg); } // Print the live ins. - for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) { + for (std::pair<unsigned, unsigned> LI : RegInfo.liveins()) { yaml::MachineFunctionLiveIn LiveIn; - printReg(I->first, LiveIn.Register, TRI); - if (I->second) - printReg(I->second, LiveIn.VirtualRegister, TRI); + printRegMIR(LI.first, LiveIn.Register, TRI); + if (LI.second) + printRegMIR(LI.second, LiveIn.VirtualRegister, TRI); MF.LiveIns.push_back(LiveIn); } @@ -311,7 +296,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, std::vector<yaml::FlowStringValue> CalleeSavedRegisters; for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) { yaml::FlowStringValue Reg; - printReg(*I, Reg, TRI); + printRegMIR(*I, Reg, TRI); CalleeSavedRegisters.push_back(Reg); } MF.CalleeSavedRegisters = CalleeSavedRegisters; @@ -337,13 +322,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); if (MFI.getSavePoint()) { raw_string_ostream StrOS(YamlMFI.SavePoint.Value); - MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) - .printMBBReference(*MFI.getSavePoint()); + StrOS << printMBBReference(*MFI.getSavePoint()); } if (MFI.getRestorePoint()) { raw_string_ostream StrOS(YamlMFI.RestorePoint.Value); - MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) - .printMBBReference(*MFI.getRestorePoint()); + StrOS << printMBBReference(*MFI.getRestorePoint()); } } @@ -366,6 +349,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.StackID = MFI.getStackID(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); YMF.FixedStackObjects.push_back(YamlObject); @@ -392,6 +376,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.StackID = MFI.getStackID(I); YMF.StackObjects.push_back(YamlObject); StackObjectOperandMapping.insert(std::make_pair( @@ -400,15 +385,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { yaml::StringValue Reg; - printReg(CSInfo.getReg(), Reg, TRI); + printRegMIR(CSInfo.getReg(), Reg, TRI); auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; - if (StackObject.IsFixed) + if (StackObject.IsFixed) { YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; - else + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } else { YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.StackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { auto LocalObject = MFI.getLocalFrameObjectMap(I); @@ -456,17 +446,20 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineConstantPool &ConstantPool) { unsigned ID = 0; for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) { - // TODO: Serialize target specific constant pool entries. - if (Constant.isMachineConstantPoolEntry()) - llvm_unreachable("Can't print target specific constant pool entries yet"); - - yaml::MachineConstantPoolValue YamlConstant; std::string Str; raw_string_ostream StrOS(Str); - Constant.Val.ConstVal->printAsOperand(StrOS); + if (Constant.isMachineConstantPoolEntry()) { + Constant.Val.MachineCPVal->print(StrOS); + } else { + Constant.Val.ConstVal->printAsOperand(StrOS); + } + + yaml::MachineConstantPoolValue YamlConstant; YamlConstant.ID = ID++; YamlConstant.Value = StrOS.str(); YamlConstant.Alignment = Constant.getAlignment(); + YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry(); + MF.Constants.push_back(YamlConstant); } } @@ -482,8 +475,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, Entry.ID = ID++; for (const auto *MBB : Table.MBBs) { raw_string_ostream StrOS(Str); - MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) - .printMBBReference(*MBB); + StrOS << printMBBReference(*MBB); Entry.Blocks.push_back(StrOS.str()); Str.clear(); } @@ -593,13 +585,19 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { bool HasLineAttributes = false; // Print the successors bool canPredictProbs = canPredictBranchProbabilities(MBB); - if (!MBB.succ_empty() && (!SimplifyMIR || !canPredictProbs || - !canPredictSuccessors(MBB))) { + // Even if the list of successors is empty, if we cannot guess it, + // we need to print it to tell the parser that the list is empty. + // This is needed, because MI model unreachable as empty blocks + // with an empty successor list. If the parser would see that + // without the successor list, it would guess the code would + // fallthrough. + if ((!MBB.succ_empty() && !SimplifyMIR) || !canPredictProbs || + !canPredictSuccessors(MBB)) { OS.indent(2) << "successors: "; for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { if (I != MBB.succ_begin()) OS << ", "; - printMBBReference(**I); + OS << printMBBReference(**I); if (!SimplifyMIR || !canPredictProbs) OS << '(' << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator()) @@ -619,7 +617,7 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { if (!First) OS << ", "; First = false; - printReg(LI.PhysReg, OS, &TRI); + OS << printReg(LI.PhysReg, &TRI); if (!LI.LaneMask.all()) OS << ":0x" << PrintLaneMask(LI.LaneMask); } @@ -648,46 +646,8 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS.indent(2) << "}\n"; } -/// Return true when an instruction has tied register that can't be determined -/// by the instruction's descriptor. -static bool hasComplexRegisterTies(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { - const auto &Operand = MI.getOperand(I); - if (!Operand.isReg() || Operand.isDef()) - // Ignore the defined registers as MCID marks only the uses as tied. - continue; - int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); - int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1; - if (ExpectedTiedIdx != TiedIdx) - return true; - } - return false; -} - -static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx, - SmallBitVector &PrintedTypes, - const MachineRegisterInfo &MRI) { - const MachineOperand &Op = MI.getOperand(OpIdx); - if (!Op.isReg()) - return LLT{}; - - if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands()) - return MRI.getType(Op.getReg()); - - auto &OpInfo = MI.getDesc().OpInfo[OpIdx]; - if (!OpInfo.isGenericType()) - return MRI.getType(Op.getReg()); - - if (PrintedTypes[OpInfo.getGenericTypeIndex()]) - return LLT{}; - - PrintedTypes.set(OpInfo.getGenericTypeIndex()); - return MRI.getType(Op.getReg()); -} - void MIPrinter::print(const MachineInstr &MI) { - const auto *MF = MI.getParent()->getParent(); + const auto *MF = MI.getMF(); const auto &MRI = MF->getRegInfo(); const auto &SubTarget = MF->getSubtarget(); const auto *TRI = SubTarget.getRegisterInfo(); @@ -698,16 +658,16 @@ void MIPrinter::print(const MachineInstr &MI) { assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); SmallBitVector PrintedTypes(8); - bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); + bool ShouldPrintRegisterTies = MI.hasComplexRegisterTies(); unsigned I = 0, E = MI.getNumOperands(); for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && !MI.getOperand(I).isImplicit(); ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, - getTypeToPrint(MI, I, PrintedTypes, MRI), - /*IsDef=*/true); + print(MI, I, TRI, ShouldPrintRegisterTies, + MI.getTypeToPrint(I, PrintedTypes, MRI), + /*PrintDef=*/false); } if (I) @@ -722,8 +682,8 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, - getTypeToPrint(MI, I, PrintedTypes, MRI)); + print(MI, I, TRI, ShouldPrintRegisterTies, + MI.getTypeToPrint(I, PrintedTypes, MRI)); NeedComma = true; } @@ -736,7 +696,7 @@ void MIPrinter::print(const MachineInstr &MI) { if (!MI.memoperands_empty()) { OS << " :: "; - const LLVMContext &Context = MF->getFunction()->getContext(); + const LLVMContext &Context = MF->getFunction().getContext(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) @@ -747,14 +707,6 @@ void MIPrinter::print(const MachineInstr &MI) { } } -void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { - OS << "%bb." << MBB.getNumber(); - if (const auto *BB = MBB.getBasicBlock()) { - if (BB->hasName()) - OS << '.' << BB->getName(); - } -} - static void printIRSlotNumber(raw_ostream &OS, int Slot) { if (Slot == -1) OS << "<badref>"; @@ -806,13 +758,8 @@ void MIPrinter::printStackObjectReference(int FrameIndex) { assert(ObjectInfo != StackObjectOperandMapping.end() && "Invalid frame index"); const FrameIndexOperand &Operand = ObjectInfo->second; - if (Operand.IsFixed) { - OS << "%fixed-stack." << Operand.ID; - return; - } - OS << "%stack." << Operand.ID; - if (!Operand.Name.empty()) - OS << '.' << Operand.Name; + MachineOperand::printStackObjectReference(OS, Operand.ID, Operand.IsFixed, + Operand.Name); } void MIPrinter::printOffset(int64_t Offset) { @@ -825,154 +772,43 @@ void MIPrinter::printOffset(int64_t Offset) { OS << " + " << Offset; } -static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { - auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); - for (const auto &I : Flags) { - if (I.first == TF) { - return I.second; - } - } - return nullptr; -} - -void MIPrinter::printTargetFlags(const MachineOperand &Op) { - if (!Op.getTargetFlags()) - return; - const auto *TII = - Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo(); - assert(TII && "expected instruction info"); - auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); - OS << "target-flags("; - const bool HasDirectFlags = Flags.first; - const bool HasBitmaskFlags = Flags.second; - if (!HasDirectFlags && !HasBitmaskFlags) { - OS << "<unknown>) "; - return; - } - if (HasDirectFlags) { - if (const auto *Name = getTargetFlagName(TII, Flags.first)) - OS << Name; - else - OS << "<unknown target flag>"; - } - if (!HasBitmaskFlags) { - OS << ") "; - return; - } - bool IsCommaNeeded = HasDirectFlags; - unsigned BitMask = Flags.second; - auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); - for (const auto &Mask : BitMasks) { - // Check if the flag's bitmask has the bits of the current mask set. - if ((BitMask & Mask.first) == Mask.first) { - if (IsCommaNeeded) - OS << ", "; - IsCommaNeeded = true; - OS << Mask.second; - // Clear the bits which were serialized from the flag's bitmask. - BitMask &= ~(Mask.first); - } - } - if (BitMask) { - // When the resulting flag's bitmask isn't zero, we know that we didn't - // serialize all of the bit flags. - if (IsCommaNeeded) - OS << ", "; - OS << "<unknown bitmask target flag>"; - } - OS << ") "; -} - -static const char *getTargetIndexName(const MachineFunction &MF, int Index) { - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "expected instruction info"); - auto Indices = TII->getSerializableTargetIndices(); - for (const auto &I : Indices) { - if (I.first == Index) { - return I.second; - } - } - return nullptr; -} - -void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, - bool IsDef) { - printTargetFlags(Op); +void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, + const TargetRegisterInfo *TRI, + bool ShouldPrintRegisterTies, LLT TypeToPrint, + bool PrintDef) { + const MachineOperand &Op = MI.getOperand(OpIdx); switch (Op.getType()) { - case MachineOperand::MO_Register: - if (Op.isImplicit()) - OS << (Op.isDef() ? "implicit-def " : "implicit "); - else if (!IsDef && Op.isDef()) - // Print the 'def' flag only when the operand is defined after '='. - OS << "def "; - if (Op.isInternalRead()) - OS << "internal "; - if (Op.isDead()) - OS << "dead "; - if (Op.isKill()) - OS << "killed "; - if (Op.isUndef()) - OS << "undef "; - if (Op.isEarlyClobber()) - OS << "early-clobber "; - if (Op.isDebug()) - OS << "debug-use "; - printReg(Op.getReg(), OS, TRI); - // Print the sub register. - if (Op.getSubReg() != 0) - OS << '.' << TRI->getSubRegIndexName(Op.getSubReg()); - if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) - OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; - if (TypeToPrint.isValid()) - OS << '(' << TypeToPrint << ')'; - break; case MachineOperand::MO_Immediate: - OS << Op.getImm(); - break; + if (MI.isOperandSubregIdx(OpIdx)) { + MachineOperand::printTargetFlags(OS, Op); + MachineOperand::printSubregIdx(OS, Op.getImm(), TRI); + break; + } + LLVM_FALLTHROUGH; + case MachineOperand::MO_Register: case MachineOperand::MO_CImmediate: - Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); - break; - case MachineOperand::MO_FPImmediate: - Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); - break; case MachineOperand::MO_MachineBasicBlock: - printMBBReference(*Op.getMBB()); - break; - case MachineOperand::MO_FrameIndex: - printStackObjectReference(Op.getIndex()); - break; case MachineOperand::MO_ConstantPoolIndex: - OS << "%const." << Op.getIndex(); - printOffset(Op.getOffset()); - break; case MachineOperand::MO_TargetIndex: - OS << "target-index("; - if (const auto *Name = getTargetIndexName( - *Op.getParent()->getParent()->getParent(), Op.getIndex())) - OS << Name; - else - OS << "<unknown>"; - OS << ')'; - printOffset(Op.getOffset()); - break; case MachineOperand::MO_JumpTableIndex: - OS << "%jump-table." << Op.getIndex(); - break; - case MachineOperand::MO_ExternalSymbol: { - StringRef Name = Op.getSymbolName(); - OS << '$'; - if (Name.empty()) { - OS << "\"\""; - } else { - printLLVMNameWithoutPrefix(OS, Name); - } - printOffset(Op.getOffset()); + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_RegisterLiveOut: + case MachineOperand::MO_Metadata: + case MachineOperand::MO_MCSymbol: { + unsigned TiedOperandIdx = 0; + if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) + TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); + const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); + Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies, + TiedOperandIdx, TRI, TII); break; } - case MachineOperand::MO_GlobalAddress: - Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); - printOffset(Op.getOffset()); + case MachineOperand::MO_FPImmediate: + Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_FrameIndex: + printStackObjectReference(Op.getIndex()); break; case MachineOperand::MO_BlockAddress: OS << "blockaddress("; @@ -991,29 +827,8 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, printCustomRegMask(Op.getRegMask(), OS, TRI); break; } - case MachineOperand::MO_RegisterLiveOut: { - const uint32_t *RegMask = Op.getRegLiveOut(); - OS << "liveout("; - bool IsCommaNeeded = false; - for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { - if (RegMask[Reg / 32] & (1U << (Reg % 32))) { - if (IsCommaNeeded) - OS << ", "; - printReg(Reg, OS, TRI); - IsCommaNeeded = true; - } - } - OS << ")"; - break; - } - case MachineOperand::MO_Metadata: - Op.getMetadata()->printAsOperand(OS, MST); - break; - case MachineOperand::MO_MCSymbol: - OS << "<mcsymbol " << *Op.getMCSymbol() << ">"; - break; case MachineOperand::MO_CFIIndex: { - const MachineFunction &MF = *Op.getParent()->getParent()->getParent(); + const MachineFunction &MF = *Op.getParent()->getMF(); print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI); break; } @@ -1022,7 +837,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, if (ID < Intrinsic::num_intrinsics) OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')'; else { - const MachineFunction &MF = *Op.getParent()->getParent()->getParent(); + const MachineFunction &MF = *Op.getParent()->getMF(); const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo(); OS << "intrinsic(@" << TII->getName(ID) << ')'; } @@ -1068,12 +883,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) << "\" "; + + assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)"); if (Op.isLoad()) OS << "load "; - else { - assert(Op.isStore() && "Non load machine operand must be a store"); + if (Op.isStore()) OS << "store "; - } printSyncScope(Context, Op.getSyncScopeID()); @@ -1084,10 +899,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, OS << Op.getSize(); if (const Value *Val = Op.getValue()) { - OS << (Op.isLoad() ? " from " : " into "); + OS << ((Op.isLoad() && Op.isStore()) ? " on " + : Op.isLoad() ? " from " : " into "); printIRValueReference(*Val); } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) { - OS << (Op.isLoad() ? " from " : " into "); + OS << ((Op.isLoad() && Op.isStore()) ? " on " + : Op.isLoad() ? " from " : " into "); assert(PVal && "Expected a pseudo source value"); switch (PVal->kind()) { case PseudoSourceValue::Stack: @@ -1168,7 +985,7 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, OS << "<badreg>"; return; } - printReg(Reg, OS, TRI); + OS << printReg(Reg, TRI); } void MIPrinter::print(const MCCFIInstruction &CFI, @@ -1176,36 +993,96 @@ void MIPrinter::print(const MCCFIInstruction &CFI, switch (CFI.getOperation()) { case MCCFIInstruction::OpSameValue: OS << "same_value "; - if (CFI.getLabel()) - OS << "<mcsymbol> "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); printCFIRegister(CFI.getRegister(), OS, TRI); break; + case MCCFIInstruction::OpRememberState: + OS << "remember_state "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; + case MCCFIInstruction::OpRestoreState: + OS << "restore_state "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; case MCCFIInstruction::OpOffset: OS << "offset "; - if (CFI.getLabel()) - OS << "<mcsymbol> "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); printCFIRegister(CFI.getRegister(), OS, TRI); OS << ", " << CFI.getOffset(); break; case MCCFIInstruction::OpDefCfaRegister: OS << "def_cfa_register "; - if (CFI.getLabel()) - OS << "<mcsymbol> "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); printCFIRegister(CFI.getRegister(), OS, TRI); break; case MCCFIInstruction::OpDefCfaOffset: OS << "def_cfa_offset "; - if (CFI.getLabel()) - OS << "<mcsymbol> "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); OS << CFI.getOffset(); break; case MCCFIInstruction::OpDefCfa: OS << "def_cfa "; - if (CFI.getLabel()) - OS << "<mcsymbol> "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); printCFIRegister(CFI.getRegister(), OS, TRI); OS << ", " << CFI.getOffset(); break; + case MCCFIInstruction::OpRelOffset: + OS << "rel_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpAdjustCfaOffset: + OS << "adjust_cfa_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + OS << CFI.getOffset(); + break; + case MCCFIInstruction::OpRestore: + OS << "restore "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpEscape: { + OS << "escape "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + if (!CFI.getValues().empty()) { + size_t e = CFI.getValues().size() - 1; + for (size_t i = 0; i < e; ++i) + OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", "; + OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", "; + } + break; + } + case MCCFIInstruction::OpUndefined: + OS << "undefined "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpRegister: + OS << "register "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", "; + printCFIRegister(CFI.getRegister2(), OS, TRI); + break; + case MCCFIInstruction::OpWindowSave: + OS << "window_save "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; default: // TODO: Print the other CFI Operations. OS << "<unserializable cfi operation>"; diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index 09354cf70c3c..1a8427430ea0 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 81597afe6b02..209abf34d885 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -13,7 +13,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -21,6 +21,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -30,10 +33,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -42,6 +42,8 @@ using namespace llvm; MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; + if (B) + IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight(); } MachineBasicBlock::~MachineBasicBlock() { @@ -68,6 +70,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } +Printable llvm::printMBBReference(const MachineBasicBlock &MBB) { + return Printable([&MBB](raw_ostream &OS) { return MBB.printAsOperand(OS); }); +} + /// When an MBB is added to an MF, we need to update the parent pointer of the /// MBB, the MBB numbering, and any instructions in the MBB to be on the right /// operand list for registers. @@ -111,7 +117,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() && "machine instruction not in a basic block"); // Remove from the use/def lists. - if (MachineFunction *MF = N->getParent()->getParent()) + if (MachineFunction *MF = N->getMF()) N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(nullptr); @@ -261,8 +267,8 @@ void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes) << " is null\n"; return; } - const Function *F = MF->getFunction(); - const Module *M = F ? F->getParent() : nullptr; + const Function &F = MF->getFunction(); + const Module *M = F.getParent(); ModuleSlotTracker MST(M); print(OS, MST, Indexes); } @@ -279,7 +285,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << Indexes->getMBBStartIdx(this) << '\t'; - OS << "BB#" << getNumber() << ": "; + OS << printMBBReference(*this) << ": "; const char *Comma = ""; if (const BasicBlock *LBB = getBasicBlock()) { @@ -300,7 +306,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; OS << " Live Ins:"; for (const auto &LI : LiveIns) { - OS << ' ' << PrintReg(LI.PhysReg, TRI); + OS << ' ' << printReg(LI.PhysReg, TRI); if (!LI.LaneMask.all()) OS << ':' << PrintLaneMask(LI.LaneMask); } @@ -311,7 +317,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; OS << " Predecessors according to CFG:"; for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) - OS << " BB#" << (*PI)->getNumber(); + OS << " " << printMBBReference(*(*PI)); OS << '\n'; } @@ -332,17 +338,23 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { - OS << " BB#" << (*SI)->getNumber(); + OS << " " << printMBBReference(*(*SI)); if (!Probs.empty()) OS << '(' << *getProbabilityIterator(SI) << ')'; } OS << '\n'; } + if (IrrLoopHeaderWeight) { + if (Indexes) OS << '\t'; + OS << " Irreducible loop header weight: " + << IrrLoopHeaderWeight.getValue(); + OS << '\n'; + } } void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { - OS << "BB#" << getNumber(); + OS << "%bb." << getNumber(); } void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { @@ -759,10 +771,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); - DEBUG(dbgs() << "Splitting critical edge:" - " BB#" << getNumber() - << " -- BB#" << NMBB->getNumber() - << " -- BB#" << Succ->getNumber() << '\n'); + DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this) + << " -- " << printMBBReference(*NMBB) << " -- " + << printMBBReference(*Succ) << '\n'); LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>(); SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>(); @@ -1015,8 +1026,8 @@ bool MachineBasicBlock::canSplitCriticalEdge( // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { - DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" - << getNumber() << '\n'); + DEBUG(dbgs() << "Won't split critical edge after degenerate " + << printMBBReference(*this) << '\n'); return false; } return true; diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 4d1ec11df46c..3459a9f71a73 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -12,23 +12,23 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/iterator.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" +#include <string> using namespace llvm; #define DEBUG_TYPE "machine-block-freq" - static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "view-machine-block-freq-propagation-dags", cl::Hidden, cl::desc("Pop up a window to show a dag displaying how machine block " @@ -42,6 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available."))); + // Similar option above, but used to control BFI display only after MBP pass cl::opt<GVDAGType> ViewBlockLayoutWithBFI( "view-block-layout-with-bfi", cl::Hidden, @@ -62,10 +63,19 @@ cl::opt<GVDAGType> ViewBlockLayoutWithBFI( // Command line option to specify the name of the function for CFG dump // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt<std::string> ViewBlockFreqFuncName; + // Command line option to specify hot frequency threshold. // Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc= extern cl::opt<unsigned> ViewHotFreqPercent; +static cl::opt<bool> PrintMachineBlockFreq( + "print-machine-bfi", cl::init(false), cl::Hidden, + cl::desc("Print the machine block frequency info.")); + +// Command line option to specify the name of the function for block frequency +// dump. Defined in Analysis/BlockFrequencyInfo.cpp. +extern cl::opt<std::string> PrintBlockFreqFuncName; + static GVDAGType getGVDT() { if (ViewBlockLayoutWithBFI != GVDT_None) return ViewBlockLayoutWithBFI; @@ -76,9 +86,9 @@ static GVDAGType getGVDT() { namespace llvm { template <> struct GraphTraits<MachineBlockFrequencyInfo *> { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; - typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator; + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_succ_iterator; + using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>; static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) { return &G->getFunction()->front(); @@ -99,21 +109,21 @@ template <> struct GraphTraits<MachineBlockFrequencyInfo *> { } }; -typedef BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo, - MachineBranchProbabilityInfo> - MBFIDOTGraphTraitsBase; +using MBFIDOTGraphTraitsBase = + BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo, + MachineBranchProbabilityInfo>; + template <> struct DOTGraphTraits<MachineBlockFrequencyInfo *> : public MBFIDOTGraphTraitsBase { - explicit DOTGraphTraits(bool isSimple = false) - : MBFIDOTGraphTraitsBase(isSimple), CurFunc(nullptr), LayoutOrderMap() {} - - const MachineFunction *CurFunc; + const MachineFunction *CurFunc = nullptr; DenseMap<const MachineBasicBlock *, int> LayoutOrderMap; + explicit DOTGraphTraits(bool isSimple = false) + : MBFIDOTGraphTraitsBase(isSimple) {} + std::string getNodeLabel(const MachineBasicBlock *Node, const MachineBlockFrequencyInfo *Graph) { - int layout_order = -1; // Attach additional ordering information if 'isSimple' is false. if (!isSimple()) { @@ -163,7 +173,7 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); } -MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {} +MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default; void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); @@ -183,6 +193,11 @@ void MachineBlockFrequencyInfo::calculate( F.getName().equals(ViewBlockFreqFuncName))) { view("MachineBlockFrequencyDAGS." + F.getName()); } + if (PrintMachineBlockFreq && + (PrintBlockFreqFuncName.empty() || + F.getName().equals(PrintBlockFreqFuncName))) { + MBFI->print(dbgs()); + } } bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { @@ -209,14 +224,20 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const { Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount( const MachineBasicBlock *MBB) const { - const Function *F = MBFI->getFunction()->getFunction(); - return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None; + const Function &F = MBFI->getFunction()->getFunction(); + return MBFI ? MBFI->getBlockProfileCount(F, MBB) : None; } Optional<uint64_t> MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { - const Function *F = MBFI->getFunction()->getFunction(); - return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None; + const Function &F = MBFI->getFunction()->getFunction(); + return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None; +} + +bool +MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) { + assert(MBFI && "Expected analysis to be available"); + return MBFI->isIrrLoopHeader(MBB); } const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 447ad629885b..4ce689607730 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1,4 +1,4 @@ -//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===// // // The LLVM Compiler Infrastructure // @@ -26,7 +26,10 @@ //===----------------------------------------------------------------------===// #include "BranchFolding.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -39,19 +42,33 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> -#include <functional> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "block-placement" @@ -91,11 +108,17 @@ static cl::opt<unsigned> LoopToColdBlockRatio( "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden); +static cl::opt<bool> ForceLoopColdBlock( + "force-loop-cold-block", + cl::desc("Force outlining cold blocks from loops."), + cl::init(false), cl::Hidden); + static cl::opt<bool> PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden); + static cl::opt<bool> ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " @@ -138,7 +161,7 @@ static cl::opt<unsigned> TailDupPlacementAggressiveThreshold( "tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " - "have a threshold that won't conflict."), cl::init(3), + "have a threshold that won't conflict."), cl::init(4), cl::Hidden); // Heuristic for tail duplication. @@ -172,12 +195,12 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI; extern cl::opt<std::string> ViewBlockFreqFuncName; namespace { + class BlockChain; + /// \brief Type for our function-wide basic block -> block chain mapping. -typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType; -} +using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>; -namespace { /// \brief A chain of blocks which will be laid out contiguously. /// /// This is the datastructure representing a chain of consecutive blocks that @@ -211,14 +234,14 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) { + : Blocks(1, BB), BlockToChain(BlockToChain) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } /// \brief Iterator over blocks within the chain. - typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; - typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator; + using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator; + using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator; /// \brief Beginning of blocks within the chain. iterator begin() { return Blocks.begin(); } @@ -286,14 +309,12 @@ public: /// /// Note: This field is reinitialized multiple times - once for each loop, /// and then once for the function as a whole. - unsigned UnscheduledPredecessors; + unsigned UnscheduledPredecessors = 0; }; -} -namespace { class MachineBlockPlacement : public MachineFunctionPass { - /// \brief A typedef for a block filter set. - typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet; + /// \brief A type for a block filter set. + using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>; /// Pair struct containing basic block and taildup profitiability struct BlockAndTailDupResult { @@ -428,6 +449,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void fillWorkLists(const MachineBasicBlock *MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds, const BlockFilterSet *BlockFilter); + void buildChain(const MachineBasicBlock *BB, BlockChain &Chain, BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop( @@ -454,31 +476,37 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *BB, const MachineBasicBlock *Succ, BranchProbability AdjustedSumProb, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Check for a trellis layout. bool isTrellis(const MachineBasicBlock *BB, const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Get the best successor given a trellis layout. BlockAndTailDupResult getBestTrellisSuccessor( const MachineBasicBlock *BB, const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs, BranchProbability AdjustedSumProb, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Get the best pair of non-conflicting edges. static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges( const MachineBasicBlock *BB, MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges); + /// Returns true if a block can tail duplicate into all unplaced /// predecessors. Filters based on loop. bool canTailDuplicateUnplacedPreds( const MachineBasicBlock *BB, MachineBasicBlock *Succ, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Find chains of triangles to tail-duplicate where a global analysis works, /// but a local analysis would not find them. void precomputeTriangleChains(); public: static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } @@ -495,10 +523,13 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char MachineBlockPlacement::ID = 0; + char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; + INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) @@ -515,7 +546,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, static std::string getBlockName(const MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber(); + OS << printMBBReference(*BB); OS << " ('" << BB->getName() << "')"; OS.flush(); return Result; @@ -1094,6 +1125,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( void MachineBlockPlacement::precomputeTriangleChains() { struct TriangleChain { std::vector<MachineBasicBlock *> Edges; + TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst) : Edges({src, dst}) {} @@ -1203,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { // When profile is available, we need to handle the triangle-shape CFG. static BranchProbability getLayoutSuccessorProbThreshold( const MachineBasicBlock *BB) { - if (!BB->getParent()->getFunction()->getEntryCount()) + if (!BB->getParent()->getFunction().getEntryCount()) return BranchProbability(StaticLikelyProb, 100); if (BB->succ_size() == 2) { const MachineBasicBlock *Succ1 = *BB->succ_begin(); @@ -1534,10 +1566,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. - WorkList.erase(remove_if(WorkList, - [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), + WorkList.erase(llvm::remove_if(WorkList, + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); if (WorkList.empty()) @@ -1659,7 +1691,7 @@ void MachineBlockPlacement::buildChain( const MachineBasicBlock *LoopHeaderBB = HeadBB; markChainSuccessors(Chain, LoopHeaderBB, BlockFilter); MachineBasicBlock *BB = *std::prev(Chain.end()); - for (;;) { + while (true) { assert(BB && "null block found at end of chain in loop."); assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop."); assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain."); @@ -1737,7 +1769,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // i.e. when the layout predecessor does not fallthrough to the loop header. // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. - if (F->getFunction()->optForSize()) + if (F->getFunction().optForSize()) return L.getHeader(); // Check that the header hasn't been fused with a preheader block due to @@ -1945,7 +1977,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, } } - BlockChain::iterator ExitIt = find(LoopChain, ExitingBB); + BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); if (ExitIt == LoopChain.end()) return; @@ -1999,7 +2031,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { auto HeaderBB = L.getHeader(); - auto HeaderIter = find(LoopChain, HeaderBB); + auto HeaderIter = llvm::find(LoopChain, HeaderBB); auto RotationPos = LoopChain.end(); BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -2146,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { // will be merged into the first outer loop chain for which this block is not // cold anymore. This needs precise profile data and we only do this when // profile data is available. - if (F->getFunction()->getEntryCount()) { + if (F->getFunction().getEntryCount() || ForceLoopColdBlock) { BlockFrequency LoopFreq(0); for (auto LoopPred : L.getHeader()->predecessors()) if (!L.contains(LoopPred)) @@ -2188,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // for better layout. bool RotateLoopWithProfile = ForcePreciseRotationCost || - (PreciseRotationCost && F->getFunction()->getEntryCount()); + (PreciseRotationCost && F->getFunction().getEntryCount()); // First check to see if there is an obviously preferable top block for the // loop. This will default to the header, but may end up as one of the @@ -2201,6 +2233,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. + // + // Loops are processed innermost to uttermost, make sure we clear + // PreferredLoopExit before processing a new loop. + PreferredLoopExit = nullptr; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); @@ -2272,7 +2308,7 @@ void MachineBlockPlacement::buildCFGChains() { new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve // the exact fallthrough behavior for. - for (;;) { + while (true) { Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) @@ -2313,7 +2349,7 @@ void MachineBlockPlacement::buildCFGChains() { buildChain(&F->front(), FunctionChain); #ifndef NDEBUG - typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; + using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>; #endif DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -2449,7 +2485,7 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction()->optForSize()) + if (F->getFunction().optForSize()) return; BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) @@ -2545,8 +2581,8 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( // duplicated from here on are already scheduled. // Note that DuplicatedToLPred always implies Removed. while (DuplicatedToLPred) { - assert (Removed && "Block must have been removed to be duplicated into its " - "layout predecessor."); + assert(Removed && "Block must have been removed to be duplicated into its " + "layout predecessor."); MachineBasicBlock *DupBB, *DupPred; // The removal callback causes Chain.end() to be updated when a block is // removed. On the first pass through the loop, the chain end should be the @@ -2629,8 +2665,10 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( if (RemBB->isEHPad()) RemoveList = EHPadWorkList; RemoveList.erase( - remove_if(RemoveList, - [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}), + llvm::remove_if(RemoveList, + [RemBB](MachineBasicBlock *BB) { + return BB == RemBB; + }), RemoveList.end()); } @@ -2648,7 +2686,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( << getBlockName(RemBB) << "\n"); }; auto RemovalCallbackRef = - llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback); + function_ref<void(MachineBasicBlock*)>(RemovalCallback); SmallVector<MachineBasicBlock *, 8> DuplicatedPreds; bool IsSimple = TailDup.isSimpleBB(BB); @@ -2677,7 +2715,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Check for single-block functions and skip them. @@ -2722,9 +2760,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (TailDupPlacement) { MPDT = &getAnalysis<MachinePostDominatorTree>(); - if (MF.getFunction()->optForSize()) + if (MF.getFunction().optForSize()) TailDupSize = 1; - TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); + bool PreRegAlloc = false; + TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } @@ -2778,7 +2817,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || - F->getFunction()->getName().equals(ViewBlockFreqFuncName))) { + F->getFunction().getName().equals(ViewBlockFreqFuncName))) { MBFI->view("MBP." + MF.getName(), false); } @@ -2789,6 +2828,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } namespace { + /// \brief A pass to compute block placement statistics. /// /// A separate pass to compute interesting statistics for evaluating block @@ -2804,6 +2844,7 @@ class MachineBlockPlacementStats : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } @@ -2817,10 +2858,13 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char MachineBlockPlacementStats::ID = 0; + char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; + INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", "Basic Block Placement Stats", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 21eff9dfff9c..e4952aaaba06 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -84,7 +84,7 @@ raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( const MachineBasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); - OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + OS << "edge " << printMBBReference(*Src) << " -> " << printMBBReference(*Dst) << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 582ff139f886..53c0d840ac84 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -1,4 +1,4 @@ -//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// +//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// // // The LLVM Compiler Infrastructure // @@ -15,18 +15,35 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <iterator> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "machine-cse" @@ -40,15 +57,18 @@ STATISTIC(NumCrossBBCSEs, STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { + class MachineCSE : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) { + + MachineCSE() : MachineFunctionPass(ID) { initializeMachineCSEPass(*PassRegistry::getPassRegistry()); } @@ -69,16 +89,18 @@ namespace { } private: - unsigned LookAheadLimit; - typedef RecyclingAllocator<BumpPtrAllocator, - ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy; - typedef ScopedHashTable<MachineInstr*, unsigned, - MachineInstrExpressionTrait, AllocatorTy> ScopedHTType; - typedef ScopedHTType::ScopeTy ScopeType; - DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; + using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<MachineInstr *, unsigned>>; + using ScopedHTType = + ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, + AllocatorTy>; + using ScopeType = ScopedHTType::ScopeTy; + + unsigned LookAheadLimit = 0; + DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; ScopedHTType VNT; - SmallVector<MachineInstr*, 64> Exps; - unsigned CurrVN; + SmallVector<MachineInstr *, 64> Exps; + unsigned CurrVN = 0; bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB); @@ -104,10 +126,13 @@ namespace { DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); }; + } // end anonymous namespace char MachineCSE::ID = 0; + char &llvm::MachineCSEID = MachineCSE::ID; + INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -225,8 +250,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - // Reading constant physregs is ok. - if (!MRI->isConstantPhysReg(Reg)) + // Reading either caller preserved or constant physregs is ok. + if (!MRI->isCallerPreservedOrConstPhysReg(Reg)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } @@ -598,12 +623,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Go through implicit defs of CSMI and MI, and clear the kill flags on // their uses in all the instructions between CSMI and MI. // We might have made some of the kill flags redundant, consider: - // subs ... %NZCV<imp-def> <- CSMI - // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore - // subs ... %NZCV<imp-def> <- MI, to be eliminated - // csinc ... %NZCV<imp-use,kill> + // subs ... implicit-def %nzcv <- CSMI + // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore + // subs ... implicit-def %nzcv <- MI, to be eliminated + // csinc ... implicit killed %nzcv // Since we eliminated MI, and reused a register imp-def'd by CSMI - // (here %NZCV), that register, if it was killed before MI, should have + // (here %nzcv), that register, if it was killed before MI, should have // that kill flag removed, because it's lifetime was extended. if (CSMI->getParent() == MI->getParent()) { for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) @@ -702,7 +727,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index e6f80dbb8630..702d21228477 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -16,17 +16,17 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -34,6 +34,11 @@ using namespace llvm; STATISTIC(NumInstCombined, "Number of machineinst combined"); +static cl::opt<unsigned> +inc_threshold("machine-combiner-inc-threshold", cl::Hidden, + cl::desc("Incremental depth computation will be used for basic " + "blocks with more instructions."), cl::init(500)); + namespace { class MachineCombiner : public MachineFunctionPass { const TargetInstrInfo *TII; @@ -73,7 +78,7 @@ private: SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, - MachineCombinerPattern Pattern); + MachineCombinerPattern Pattern, bool SlackIsAccurate); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -155,9 +160,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, assert(DefInstr && "There must be a definition for a new virtual register"); DepthOp = InstrDepth[II->second]; - LatencyOp = TSchedModel.computeOperandLatency( - DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), - InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg()); + int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg()); + LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx, + InstrPtr, UseIdx); } else { MachineInstr *DefInstr = getOperandDef(MO); if (DefInstr) { @@ -247,7 +253,8 @@ bool MachineCombiner::improvesCriticalPathLen( SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, - MachineCombinerPattern Pattern) { + MachineCombinerPattern Pattern, + bool SlackIsAccurate) { assert(TSchedModel.hasInstrSchedModelOrItineraries() && "Missing machine model\n"); // NewRoot is the last instruction in the \p InsInstrs vector. @@ -258,7 +265,7 @@ bool MachineCombiner::improvesCriticalPathLen( unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n"; dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; dbgs() << " RootDepth: " << RootDepth << "\n"); @@ -274,24 +281,32 @@ bool MachineCombiner::improvesCriticalPathLen( // of the original code sequence. This may allow the transform to proceed // even if the instruction depths (data dependency cycles) become worse. - unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); - unsigned RootLatency = 0; + // Account for the latency of the inserted and deleted instructions by + // adding up their latencies. This assumes that the inserted and deleted + // instructions are dependent instruction chains, which might not hold + // in all cases. + unsigned NewRootLatency = 0; + for (unsigned i = 0; i < InsInstrs.size() - 1; i++) + NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); + NewRootLatency += getLatency(Root, NewRoot, BlockTrace); + unsigned RootLatency = 0; for (auto I : DelInstrs) RootLatency += TSchedModel.computeInstrLatency(I); unsigned RootSlack = BlockTrace.getInstrSlack(*Root); - + unsigned NewCycleCount = NewRootDepth + NewRootLatency; + unsigned OldCycleCount = RootDepth + RootLatency + + (SlackIsAccurate ? RootSlack : 0); DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; dbgs() << " RootLatency: " << RootLatency << "\n"; - dbgs() << " RootSlack: " << RootSlack << "\n"; + dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate=" + << SlackIsAccurate << "\n"; dbgs() << " NewRootDepth + NewRootLatency = " - << NewRootDepth + NewRootLatency << "\n"; + << NewCycleCount << "\n"; dbgs() << " RootDepth + RootLatency + RootSlack = " - << RootDepth + RootLatency + RootSlack << "\n";); - - unsigned NewCycleCount = NewRootDepth + NewRootLatency; - unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; + << OldCycleCount << "\n"; + ); return NewCycleCount <= OldCycleCount; } @@ -354,17 +369,44 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { return false; } +/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction +/// depths if requested. +/// +/// \param MBB basic block to insert instructions in +/// \param MI current machine instruction +/// \param InsInstrs new instructions to insert in \p MBB +/// \param DelInstrs instruction to delete from \p MBB +/// \param MinInstr is a pointer to the machine trace information +/// \param RegUnits set of live registers, needed to compute instruction depths +/// \param IncrementalUpdate if true, compute instruction depths incrementally, +/// otherwise invalidate the trace static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVector<MachineInstr *, 16> InsInstrs, SmallVector<MachineInstr *, 16> DelInstrs, - MachineTraceMetrics *Traces) { + MachineTraceMetrics::Ensemble *MinInstr, + SparseSet<LiveRegUnit> &RegUnits, + bool IncrementalUpdate) { for (auto *InstrPtr : InsInstrs) MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); - for (auto *InstrPtr : DelInstrs) + + for (auto *InstrPtr : DelInstrs) { InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); - ++NumInstCombined; - Traces->invalidate(MBB); - Traces->verifyAnalysis(); + // Erase all LiveRegs defined by the removed instruction + for (auto I = RegUnits.begin(); I != RegUnits.end(); ) { + if (I->MI == InstrPtr) + I = RegUnits.erase(I); + else + I++; + } + } + + if (IncrementalUpdate) + for (auto *InstrPtr : InsInstrs) + MinInstr->updateDepth(MBB, *InstrPtr, RegUnits); + else + MinInstr->invalidate(MBB); + + NumInstCombined++; } /// Substitute a slow code sequence with a faster one by @@ -378,9 +420,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { bool Changed = false; DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + bool IncrementalUpdate = false; auto BlockIter = MBB->begin(); + decltype(BlockIter) LastUpdate; // Check if the block is in a loop. const MachineLoop *ML = MLI->getLoopFor(MBB); + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(TRI->getNumRegUnits()); while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; @@ -419,9 +468,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { SmallVector<MachineInstr *, 16> InsInstrs; SmallVector<MachineInstr *, 16> DelInstrs; DenseMap<unsigned, unsigned> InstrIdxForVirtReg; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - Traces->verifyAnalysis(); TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, InstrIdxForVirtReg); unsigned NewInstCount = InsInstrs.size(); @@ -436,23 +482,43 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (ML && TII->isThroughputPattern(P)) SubstituteAlways = true; + if (IncrementalUpdate) { + // Update depths since the last incremental update. + MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits); + LastUpdate = BlockIter; + } + // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) { - insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces); + insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, + RegUnits, IncrementalUpdate); // Eagerly stop after the first pattern fires. Changed = true; break; } else { - // Calculating the trace metrics may be expensive, - // so only do this when necessary. + // For big basic blocks, we only compute the full trace the first time + // we hit this. We do not invalidate the trace, but instead update the + // instruction depths incrementally. + // NOTE: Only the instruction depths up to MI are accurate. All other + // trace information is not updated. MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs, - InstrIdxForVirtReg, P) && + InstrIdxForVirtReg, P, + !IncrementalUpdate) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) { - insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces); + if (MBB->size() > inc_threshold) { + // Use incremental depth updates for basic blocks above treshold + IncrementalUpdate = true; + LastUpdate = BlockIter; + } + + insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, + RegUnits, IncrementalUpdate); + // Eagerly stop after the first pattern fires. Changed = true; break; @@ -467,6 +533,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { } } + if (Changed && IncrementalUpdate) + Traces->invalidate(MBB); return Changed; } @@ -480,7 +548,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; - OptSize = MF.getFunction()->optForSize(); + OptSize = MF.getFunction().optForSize(); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 7d5a68192e6b..fcec05adc732 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -12,19 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <iterator> + using namespace llvm; #define DEBUG_TYPE "machine-cp" @@ -32,9 +39,10 @@ using namespace llvm; STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { - typedef SmallVector<unsigned, 4> RegList; - typedef DenseMap<unsigned, RegList> SourceMap; - typedef DenseMap<unsigned, MachineInstr*> Reg2MIMap; + +using RegList = SmallVector<unsigned, 4>; +using SourceMap = DenseMap<unsigned, RegList>; +using Reg2MIMap = DenseMap<unsigned, MachineInstr *>; class MachineCopyPropagation : public MachineFunctionPass { const TargetRegisterInfo *TRI; @@ -43,6 +51,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid + MachineCopyPropagation() : MachineFunctionPass(ID) { initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); } @@ -67,16 +76,23 @@ namespace { /// Candidates for deletion. SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; + /// Def -> available copies map. Reg2MIMap AvailCopyMap; + /// Def -> copies map. Reg2MIMap CopyMap; + /// Src -> Def map SourceMap SrcMap; + bool Changed; }; -} + +} // end anonymous namespace + char MachineCopyPropagation::ID = 0; + char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, @@ -170,6 +186,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, // Check that the existing copy uses the correct sub registers. MachineInstr &PrevCopy = *CI->second; + if (PrevCopy.getOperand(0).isDead()) + return false; if (!isNopCopy(PrevCopy, Src, Def, TRI)) return false; @@ -207,19 +225,19 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. - // %ECX<def> = COPY %EAX - // ... nothing clobbered EAX. - // %EAX<def> = COPY %ECX + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %eax = COPY %ecx // => - // %ECX<def> = COPY %EAX + // %ecx = COPY %eax // // or // - // %ECX<def> = COPY %EAX - // ... nothing clobbered EAX. - // %ECX<def> = COPY %EAX + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %ecx = COPY %eax // => - // %ECX<def> = COPY %EAX + // %ecx = COPY %eax if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) continue; @@ -243,11 +261,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If 'Def' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. - // %xmm9<def> = copy %xmm2 + // %xmm9 = copy %xmm2 // ... - // %xmm2<def> = copy %xmm0 + // %xmm2 = copy %xmm0 // ... - // %xmm2<def> = copy %xmm9 + // %xmm2 = copy %xmm9 ClobberRegister(Def); for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) @@ -269,7 +287,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // it's no longer available for copy propagation. RegList &DestList = SrcMap[Src]; if (!is_contained(DestList, Def)) - DestList.push_back(Def); + DestList.push_back(Def); continue; } @@ -360,7 +378,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; Changed = false; @@ -374,4 +392,3 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { return Changed; } - diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 845e8232477c..517ac29b6450 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -26,7 +26,7 @@ static bool VerifyMachineDomInfo = true; static bool VerifyMachineDomInfo = false; #endif static cl::opt<bool, true> VerifyMachineDomInfoX( - "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), + "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden, cl::desc("Verify machine dominator info (time consuming)")); namespace llvm { @@ -148,7 +148,8 @@ void MachineDominatorTree::verifyDomTree() const { OtherDT.recalculate(F); if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || DT->compare(OtherDT)) { - errs() << "MachineDominatorTree is not up to date!\nComputed:\n"; + errs() << "MachineDominatorTree for function " << F.getName() + << " is not up to date!\nComputed:\n"; DT->print(errs()); errs() << "\nActual:\n"; OtherDT.print(errs()); diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index 73d778ff3023..2aa9d6b816c8 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -16,12 +16,12 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #define DEBUG_TYPE "codegen" @@ -47,11 +47,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, } int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, const AllocaInst *Alloca) { + bool IsSpillSlot, + const AllocaInst *Alloca, + uint8_t StackID) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, - !isSS)); + Objects.push_back(StackObject(Size, Alignment, 0, false, IsSpillSlot, Alloca, + !IsSpillSlot, StackID)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -77,7 +79,7 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, } int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isAliased) { + bool IsImmutable, bool IsAliased) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and @@ -85,23 +87,24 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. Note that unlike the non-fixed case, if the // stack needs realignment, we can't assume that the stack will in fact be // aligned. - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ false, - /*Alloca*/ nullptr, isAliased)); + unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.insert(Objects.begin(), + StackObject(Size, Alignment, SPOffset, IsImmutable, + /*isSpillSlot=*/false, /*Alloca=*/nullptr, + IsAliased)); return -++NumFixedObjects; } int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ true, - /*Alloca*/ nullptr, - /*isAliased*/ false)); + bool IsImmutable) { + unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.insert(Objects.begin(), + StackObject(Size, Alignment, SPOffset, IsImmutable, + /*IsSpillSlot=*/true, /*Alloca=*/nullptr, + /*IsAliased=*/false)); return -++NumFixedObjects; } @@ -212,6 +215,10 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ for (unsigned i = 0, e = Objects.size(); i != e; ++i) { const StackObject &SO = Objects[i]; OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + + if (SO.StackID != 0) + OS << "id=" << SO.StackID << ' '; + if (SO.Size == ~0ULL) { OS << "dead\n"; continue; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 742b095d955e..bc8eb1429d92 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -1,4 +1,4 @@ -//===-- MachineFunction.cpp -----------------------------------------------===// +//===- MachineFunction.cpp ------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,45 +14,76 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <string> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "codegen" static cl::opt<unsigned> - AlignAllFunctions("align-all-functions", - cl::desc("Force the alignment of all functions."), - cl::init(0), cl::Hidden); +AlignAllFunctions("align-all-functions", + cl::desc("Force the alignment of all functions."), + cl::init(0), cl::Hidden); static const char *getPropertyName(MachineFunctionProperties::Property Prop) { - typedef MachineFunctionProperties::Property P; + using P = MachineFunctionProperties::Property; + switch(Prop) { case P::FailedISel: return "FailedISel"; case P::IsSSA: return "IsSSA"; @@ -81,23 +112,23 @@ void MachineFunctionProperties::print(raw_ostream &OS) const { //===----------------------------------------------------------------------===// // Out-of-line virtual method. -MachineFunctionInfo::~MachineFunctionInfo() {} +MachineFunctionInfo::~MachineFunctionInfo() = default; void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, - const Function *Fn) { - if (Fn->hasFnAttribute(Attribute::StackAlignment)) - return Fn->getFnStackAlignment(); + const Function &F) { + if (F.hasFnAttribute(Attribute::StackAlignment)) + return F.getFnStackAlignment(); return STI->getFrameLowering()->getStackAlignment(); } -MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, +MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target, + const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &mmi) - : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), - MMI(mmi) { + : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) { FunctionNumber = FunctionNum; init(); } @@ -115,21 +146,21 @@ void MachineFunction::init() { // We can realign the stack if the target supports it and the user hasn't // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && - !Fn->hasFnAttribute("no-realign-stack"); + !F.hasFnAttribute("no-realign-stack"); FrameInfo = new (Allocator) MachineFrameInfo( - getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP, + getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP, /*ForceRealign=*/CanRealignSP && - Fn->hasFnAttribute(Attribute::StackAlignment)); + F.hasFnAttribute(Attribute::StackAlignment)); - if (Fn->hasFnAttribute(Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); + if (F.hasFnAttribute(Attribute::StackAlignment)) + FrameInfo->ensureMaxAlignment(F.getFnStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); - // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + // FIXME: Shouldn't use pref alignment if explicit alignment is set on F. // FIXME: Use Function::optForSize(). - if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) + if (!F.hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -139,7 +170,7 @@ void MachineFunction::init() { JumpTableInfo = nullptr; if (isFuncletEHPersonality(classifyEHPersonality( - Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) { + F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) { WinEHInfo = new (Allocator) WinEHFuncInfo(); } @@ -147,7 +178,9 @@ void MachineFunction::init() { "Can't create a MachineFunction using a Module with a " "Target-incompatible DataLayout attached\n"); - PSVManager = llvm::make_unique<PseudoSourceValueManager>(); + PSVManager = + llvm::make_unique<PseudoSourceValueManager>(*(getSubtarget(). + getInstrInfo())); } MachineFunction::~MachineFunction() { @@ -166,6 +199,7 @@ void MachineFunction::clear() { InstructionRecycler.clear(Allocator); OperandRecycler.clear(Allocator); BasicBlockRecycler.clear(Allocator); + CodeViewAnnotations.clear(); VariableDbgInfos.clear(); if (RegInfo) { RegInfo->~MachineRegisterInfo(); @@ -194,7 +228,7 @@ void MachineFunction::clear() { } const DataLayout &MachineFunction::getDataLayout() const { - return Fn->getParent()->getDataLayout(); + return F.getParent()->getDataLayout(); } /// Get the JumpTableInfo for this function. @@ -210,7 +244,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { /// Should we be emitting segmented stack stuff for the function bool MachineFunction::shouldSplitStack() const { - return getFunction()->hasFnAttribute("split-stack"); + return getFunction().hasFnAttribute("split-stack"); } /// This discards all of the MachineBasicBlock numbers and recomputes them. @@ -270,6 +304,26 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { MachineInstr(*this, *Orig); } +MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) { + MachineInstr *FirstClone = nullptr; + MachineBasicBlock::const_instr_iterator I = Orig.getIterator(); + while (true) { + MachineInstr *Cloned = CloneMachineInstr(&*I); + MBB.insert(InsertBefore, Cloned); + if (FirstClone == nullptr) { + FirstClone = Cloned; + } else { + Cloned->bundleWithPred(); + } + + if (!I->isBundledWithSucc()) + break; + ++I; + } + return *FirstClone; +} + /// Delete the given MachineInstr. /// /// This function also serves as the MachineInstr destructor - the real @@ -431,8 +485,7 @@ LLVM_DUMP_METHOD void MachineFunction::dump() const { #endif StringRef MachineFunction::getName() const { - assert(getFunction() && "No function!"); - return getFunction()->getName(); + return getFunction().getName(); } void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { @@ -456,17 +509,17 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { OS << "Function Live Ins: "; for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { - OS << PrintReg(I->first, TRI); + OS << printReg(I->first, TRI); if (I->second) - OS << " in " << PrintReg(I->second, TRI); + OS << " in " << printReg(I->second, TRI); if (std::next(I) != E) OS << ", "; } OS << '\n'; } - ModuleSlotTracker MST(getFunction()->getParent()); - MST.incorporateFunction(*getFunction()); + ModuleSlotTracker MST(getFunction().getParent()); + MST.incorporateFunction(getFunction()); for (const auto &BB : *this) { OS << '\n'; BB.print(OS, MST, Indexes); @@ -476,10 +529,10 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { } namespace llvm { + template<> struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { - - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { return ("CFG for '" + F->getName() + "' function").str(); @@ -492,7 +545,7 @@ namespace llvm { raw_string_ostream OSS(OutStr); if (isSimple()) { - OSS << "BB#" << Node->getNumber(); + OSS << printMBBReference(*Node); if (const BasicBlock *BB = Node->getBasicBlock()) OSS << ": " << BB->getName(); } else @@ -510,7 +563,8 @@ namespace llvm { return OutStr; } }; -} + +} // end namespace llvm void MachineFunction::viewCFG() const { @@ -797,7 +851,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerABIAlignment(); + return TD.getPointerABIAlignment(0); case MachineJumpTableInfo::EK_GPRel64BlockAddress: return TD.getABIIntegerTypeAlignment(64); case MachineJumpTableInfo::EK_GPRel32BlockAddress: @@ -851,9 +905,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << "Jump Tables:\n"; for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { - OS << " jt#" << i << ": "; + OS << printJumpTableEntryReference(i) << ": "; for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) - OS << " BB#" << JumpTables[i].MBBs[j]->getNumber(); + OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]); } OS << '\n'; @@ -863,12 +917,15 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); } #endif +Printable llvm::printJumpTableEntryReference(unsigned Idx) { + return Printable([Idx](raw_ostream &OS) { OS << "%jump-table." << Idx; }); +} //===----------------------------------------------------------------------===// // MachineConstantPool implementation //===----------------------------------------------------------------------===// -void MachineConstantPoolValue::anchor() { } +void MachineConstantPoolValue::anchor() {} Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 535757ed87c1..14655c6eb700 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -33,6 +34,9 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -58,11 +62,8 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -73,695 +74,6 @@ using namespace llvm; -static cl::opt<bool> PrintWholeRegMask( - "print-whole-regmask", - cl::desc("Print the full contents of regmask operands in IR dumps"), - cl::init(true), cl::Hidden); - -//===----------------------------------------------------------------------===// -// MachineOperand Implementation -//===----------------------------------------------------------------------===// - -void MachineOperand::setReg(unsigned Reg) { - if (getReg() == Reg) return; // No change. - - // Otherwise, we have to change the register. If this operand is embedded - // into a machine function, we need to update the old and new register's - // use/def lists. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) { - MachineRegisterInfo &MRI = MF->getRegInfo(); - MRI.removeRegOperandFromUseList(this); - SmallContents.RegNo = Reg; - MRI.addRegOperandToUseList(this); - return; - } - - // Otherwise, just change the register, no problem. :) - SmallContents.RegNo = Reg; -} - -void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, - const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); - if (SubIdx && getSubReg()) - SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); - setReg(Reg); - if (SubIdx) - setSubReg(SubIdx); -} - -void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - if (getSubReg()) { - Reg = TRI.getSubReg(Reg, getSubReg()); - // Note that getSubReg() may return 0 if the sub-register doesn't exist. - // That won't happen in legal code. - setSubReg(0); - if (isDef()) - setIsUndef(false); - } - setReg(Reg); -} - -/// Change a def to a use, or a use to a def. -void MachineOperand::setIsDef(bool Val) { - assert(isReg() && "Wrong MachineOperand accessor"); - assert((!Val || !isDebug()) && "Marking a debug operation as def"); - if (IsDef == Val) - return; - // MRI may keep uses and defs in different list positions. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) { - MachineRegisterInfo &MRI = MF->getRegInfo(); - MRI.removeRegOperandFromUseList(this); - IsDef = Val; - MRI.addRegOperandToUseList(this); - return; - } - IsDef = Val; -} - -// If this operand is currently a register operand, and if this is in a -// function, deregister the operand from the register's use/def list. -void MachineOperand::removeRegFromUses() { - if (!isReg() || !isOnRegUseList()) - return; - - if (MachineInstr *MI = getParent()) { - if (MachineBasicBlock *MBB = MI->getParent()) { - if (MachineFunction *MF = MBB->getParent()) - MF->getRegInfo().removeRegOperandFromUseList(this); - } - } -} - -/// ChangeToImmediate - Replace this operand with a new immediate operand of -/// the specified value. If an operand is known to be an immediate already, -/// the setImm method should be used. -void MachineOperand::ChangeToImmediate(int64_t ImmVal) { - assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); - - removeRegFromUses(); - - OpKind = MO_Immediate; - Contents.ImmVal = ImmVal; -} - -void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { - assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); - - removeRegFromUses(); - - OpKind = MO_FPImmediate; - Contents.CFP = FPImm; -} - -void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into an external symbol"); - - removeRegFromUses(); - - OpKind = MO_ExternalSymbol; - Contents.OffsetedInfo.Val.SymbolName = SymName; - setOffset(0); // Offset is always 0. - setTargetFlags(TargetFlags); -} - -void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into an MCSymbol"); - - removeRegFromUses(); - - OpKind = MO_MCSymbol; - Contents.Sym = Sym; -} - -void MachineOperand::ChangeToFrameIndex(int Idx) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into a FrameIndex"); - - removeRegFromUses(); - - OpKind = MO_FrameIndex; - setIndex(Idx); -} - -/// ChangeToRegister - Replace this operand with a new register operand of -/// the specified value. If an operand is known to be an register already, -/// the setReg method should be used. -void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, - bool isKill, bool isDead, bool isUndef, - bool isDebug) { - MachineRegisterInfo *RegInfo = nullptr; - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - RegInfo = &MF->getRegInfo(); - // If this operand is already a register operand, remove it from the - // register's use/def lists. - bool WasReg = isReg(); - if (RegInfo && WasReg) - RegInfo->removeRegOperandFromUseList(this); - - // Change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - SubReg_TargetFlags = 0; - IsDef = isDef; - IsImp = isImp; - IsKill = isKill; - IsDead = isDead; - IsUndef = isUndef; - IsInternalRead = false; - IsEarlyClobber = false; - IsDebug = isDebug; - // Ensure isOnRegUseList() returns false. - Contents.Reg.Prev = nullptr; - // Preserve the tie when the operand was already a register. - if (!WasReg) - TiedTo = 0; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (RegInfo) - RegInfo->addRegOperandToUseList(this); -} - -/// isIdenticalTo - Return true if this operand is identical to the specified -/// operand. Note that this should stay in sync with the hash_value overload -/// below. -bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { - if (getType() != Other.getType() || - getTargetFlags() != Other.getTargetFlags()) - return false; - - switch (getType()) { - case MachineOperand::MO_Register: - return getReg() == Other.getReg() && isDef() == Other.isDef() && - getSubReg() == Other.getSubReg(); - case MachineOperand::MO_Immediate: - return getImm() == Other.getImm(); - case MachineOperand::MO_CImmediate: - return getCImm() == Other.getCImm(); - case MachineOperand::MO_FPImmediate: - return getFPImm() == Other.getFPImm(); - case MachineOperand::MO_MachineBasicBlock: - return getMBB() == Other.getMBB(); - case MachineOperand::MO_FrameIndex: - return getIndex() == Other.getIndex(); - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_TargetIndex: - return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); - case MachineOperand::MO_JumpTableIndex: - return getIndex() == Other.getIndex(); - case MachineOperand::MO_GlobalAddress: - return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); - case MachineOperand::MO_ExternalSymbol: - return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && - getOffset() == Other.getOffset(); - case MachineOperand::MO_BlockAddress: - return getBlockAddress() == Other.getBlockAddress() && - getOffset() == Other.getOffset(); - case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: { - // Shallow compare of the two RegMasks - const uint32_t *RegMask = getRegMask(); - const uint32_t *OtherRegMask = Other.getRegMask(); - if (RegMask == OtherRegMask) - return true; - - // Calculate the size of the RegMask - const MachineFunction *MF = getParent()->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; - - // Deep compare of the two RegMasks - return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); - } - case MachineOperand::MO_MCSymbol: - return getMCSymbol() == Other.getMCSymbol(); - case MachineOperand::MO_CFIIndex: - return getCFIIndex() == Other.getCFIIndex(); - case MachineOperand::MO_Metadata: - return getMetadata() == Other.getMetadata(); - case MachineOperand::MO_IntrinsicID: - return getIntrinsicID() == Other.getIntrinsicID(); - case MachineOperand::MO_Predicate: - return getPredicate() == Other.getPredicate(); - } - llvm_unreachable("Invalid machine operand type"); -} - -// Note: this must stay exactly in sync with isIdenticalTo above. -hash_code llvm::hash_value(const MachineOperand &MO) { - switch (MO.getType()) { - case MachineOperand::MO_Register: - // Register operands don't have target flags. - return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); - case MachineOperand::MO_Immediate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); - case MachineOperand::MO_CImmediate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); - case MachineOperand::MO_FPImmediate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); - case MachineOperand::MO_MachineBasicBlock: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); - case MachineOperand::MO_FrameIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_TargetIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), - MO.getOffset()); - case MachineOperand::MO_JumpTableIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); - case MachineOperand::MO_ExternalSymbol: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), - MO.getSymbolName()); - case MachineOperand::MO_GlobalAddress: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), - MO.getOffset()); - case MachineOperand::MO_BlockAddress: - return hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getBlockAddress(), MO.getOffset()); - case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); - case MachineOperand::MO_Metadata: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); - case MachineOperand::MO_MCSymbol: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); - case MachineOperand::MO_CFIIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); - case MachineOperand::MO_IntrinsicID: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); - case MachineOperand::MO_Predicate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); - } - llvm_unreachable("Invalid machine operand type"); -} - -void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI, - const TargetIntrinsicInfo *IntrinsicInfo) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TRI, IntrinsicInfo); -} - -void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI, - const TargetIntrinsicInfo *IntrinsicInfo) const { - switch (getType()) { - case MachineOperand::MO_Register: - OS << PrintReg(getReg(), TRI, getSubReg()); - - if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isInternalRead() || isEarlyClobber() || isTied()) { - OS << '<'; - bool NeedComma = false; - if (isDef()) { - if (NeedComma) OS << ','; - if (isEarlyClobber()) - OS << "earlyclobber,"; - if (isImplicit()) - OS << "imp-"; - OS << "def"; - NeedComma = true; - // <def,read-undef> only makes sense when getSubReg() is set. - // Don't clutter the output otherwise. - if (isUndef() && getSubReg()) - OS << ",read-undef"; - } else if (isImplicit()) { - OS << "imp-use"; - NeedComma = true; - } - - if (isKill()) { - if (NeedComma) OS << ','; - OS << "kill"; - NeedComma = true; - } - if (isDead()) { - if (NeedComma) OS << ','; - OS << "dead"; - NeedComma = true; - } - if (isUndef() && isUse()) { - if (NeedComma) OS << ','; - OS << "undef"; - NeedComma = true; - } - if (isInternalRead()) { - if (NeedComma) OS << ','; - OS << "internal"; - NeedComma = true; - } - if (isTied()) { - if (NeedComma) OS << ','; - OS << "tied"; - if (TiedTo != 15) - OS << unsigned(TiedTo - 1); - } - OS << '>'; - } - break; - case MachineOperand::MO_Immediate: - OS << getImm(); - break; - case MachineOperand::MO_CImmediate: - getCImm()->getValue().print(OS, false); - break; - case MachineOperand::MO_FPImmediate: - if (getFPImm()->getType()->isFloatTy()) { - OS << getFPImm()->getValueAPF().convertToFloat(); - } else if (getFPImm()->getType()->isHalfTy()) { - APFloat APF = getFPImm()->getValueAPF(); - bool Unused; - APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused); - OS << "half " << APF.convertToFloat(); - } else if (getFPImm()->getType()->isFP128Ty()) { - APFloat APF = getFPImm()->getValueAPF(); - SmallString<16> Str; - getFPImm()->getValueAPF().toString(Str); - OS << "quad " << Str; - } else if (getFPImm()->getType()->isX86_FP80Ty()) { - APFloat APF = getFPImm()->getValueAPF(); - OS << "x86_fp80 0xK"; - APInt API = APF.bitcastToAPInt(); - OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, - /*Upper=*/true); - OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, - /*Upper=*/true); - } else { - OS << getFPImm()->getValueAPF().convertToDouble(); - } - break; - case MachineOperand::MO_MachineBasicBlock: - OS << "<BB#" << getMBB()->getNumber() << ">"; - break; - case MachineOperand::MO_FrameIndex: - OS << "<fi#" << getIndex() << '>'; - break; - case MachineOperand::MO_ConstantPoolIndex: - OS << "<cp#" << getIndex(); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_TargetIndex: - OS << "<ti#" << getIndex(); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_JumpTableIndex: - OS << "<jt#" << getIndex() << '>'; - break; - case MachineOperand::MO_GlobalAddress: - OS << "<ga:"; - getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_ExternalSymbol: - OS << "<es:" << getSymbolName(); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_BlockAddress: - OS << '<'; - getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_RegisterMask: { - unsigned NumRegsInMask = 0; - unsigned NumRegsEmitted = 0; - OS << "<regmask"; - for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { - unsigned MaskWord = i / 32; - unsigned MaskBit = i % 32; - if (getRegMask()[MaskWord] & (1 << MaskBit)) { - if (PrintWholeRegMask || NumRegsEmitted <= 10) { - OS << " " << PrintReg(i, TRI); - NumRegsEmitted++; - } - NumRegsInMask++; - } - } - if (NumRegsEmitted != NumRegsInMask) - OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; - OS << ">"; - break; - } - case MachineOperand::MO_RegisterLiveOut: - OS << "<regliveout>"; - break; - case MachineOperand::MO_Metadata: - OS << '<'; - getMetadata()->printAsOperand(OS, MST); - OS << '>'; - break; - case MachineOperand::MO_MCSymbol: - OS << "<MCSym=" << *getMCSymbol() << '>'; - break; - case MachineOperand::MO_CFIIndex: - OS << "<call frame instruction>"; - break; - case MachineOperand::MO_IntrinsicID: { - Intrinsic::ID ID = getIntrinsicID(); - if (ID < Intrinsic::num_intrinsics) - OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>'; - else if (IntrinsicInfo) - OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>'; - else - OS << "<intrinsic:" << ID << '>'; - break; - } - case MachineOperand::MO_Predicate: { - auto Pred = static_cast<CmpInst::Predicate>(getPredicate()); - OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred") - << CmpInst::getPredicateName(Pred) << '>'; - break; - } - } - if (unsigned TF = getTargetFlags()) - OS << "[TF=" << TF << ']'; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MachineOperand::dump() const { - dbgs() << *this << '\n'; -} -#endif - -//===----------------------------------------------------------------------===// -// MachineMemOperand Implementation -//===----------------------------------------------------------------------===// - -/// getAddrSpace - Return the LLVM IR address space number that this pointer -/// points into. -unsigned MachinePointerInfo::getAddrSpace() const { - if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0; - return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace(); -} - -/// isDereferenceable - Return true if V is always dereferenceable for -/// Offset + Size byte. -bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, - const DataLayout &DL) const { - if (!V.is<const Value*>()) - return false; - - const Value *BasePtr = V.get<const Value*>(); - if (BasePtr == nullptr) - return false; - - return isDereferenceableAndAlignedPointer( - BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); -} - -/// getConstantPool - Return a MachinePointerInfo record that refers to the -/// constant pool. -MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { - return MachinePointerInfo(MF.getPSVManager().getConstantPool()); -} - -/// getFixedStack - Return a MachinePointerInfo record that refers to the -/// the specified FrameIndex. -MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, - int FI, int64_t Offset) { - return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); -} - -MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { - return MachinePointerInfo(MF.getPSVManager().getJumpTable()); -} - -MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { - return MachinePointerInfo(MF.getPSVManager().getGOT()); -} - -MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, - int64_t Offset) { - return MachinePointerInfo(MF.getPSVManager().getStack(), Offset); -} - -MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, unsigned int a, - const AAMDNodes &AAInfo, - const MDNode *Ranges, - SyncScope::ID SSID, - AtomicOrdering Ordering, - AtomicOrdering FailureOrdering) - : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), - AAInfo(AAInfo), Ranges(Ranges) { - assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() || - isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) && - "invalid pointer value"); - assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); - assert((isLoad() || isStore()) && "Not a load/store!"); - - AtomicInfo.SSID = static_cast<unsigned>(SSID); - assert(getSyncScopeID() == SSID && "Value truncated"); - AtomicInfo.Ordering = static_cast<unsigned>(Ordering); - assert(getOrdering() == Ordering && "Value truncated"); - AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering); - assert(getFailureOrdering() == FailureOrdering && "Value truncated"); -} - -/// Profile - Gather unique data for the object. -/// -void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(getOffset()); - ID.AddInteger(Size); - ID.AddPointer(getOpaqueValue()); - ID.AddInteger(getFlags()); - ID.AddInteger(getBaseAlignment()); -} - -void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { - // The Value and Offset may differ due to CSE. But the flags and size - // should be the same. - assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); - assert(MMO->getSize() == getSize() && "Size mismatch!"); - - if (MMO->getBaseAlignment() >= getBaseAlignment()) { - // Update the alignment value. - BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1; - // Also update the base and offset, because the new alignment may - // not be applicable with the old ones. - PtrInfo = MMO->PtrInfo; - } -} - -/// getAlignment - Return the minimum known alignment in bytes of the -/// actual memory reference. -uint64_t MachineMemOperand::getAlignment() const { - return MinAlign(getBaseAlignment(), getOffset()); -} - -void MachineMemOperand::print(raw_ostream &OS) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST); -} -void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { - assert((isLoad() || isStore()) && - "SV has to be a load, store or both."); - - if (isVolatile()) - OS << "Volatile "; - - if (isLoad()) - OS << "LD"; - if (isStore()) - OS << "ST"; - OS << getSize(); - - // Print the address information. - OS << "["; - if (const Value *V = getValue()) - V->printAsOperand(OS, /*PrintType=*/false, MST); - else if (const PseudoSourceValue *PSV = getPseudoValue()) - PSV->printCustom(OS); - else - OS << "<unknown>"; - - unsigned AS = getAddrSpace(); - if (AS != 0) - OS << "(addrspace=" << AS << ')'; - - // If the alignment of the memory reference itself differs from the alignment - // of the base pointer, print the base alignment explicitly, next to the base - // pointer. - if (getBaseAlignment() != getAlignment()) - OS << "(align=" << getBaseAlignment() << ")"; - - if (getOffset() != 0) - OS << "+" << getOffset(); - OS << "]"; - - // Print the alignment of the reference. - if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) - OS << "(align=" << getAlignment() << ")"; - - // Print TBAA info. - if (const MDNode *TBAAInfo = getAAInfo().TBAA) { - OS << "(tbaa="; - if (TBAAInfo->getNumOperands() > 0) - TBAAInfo->getOperand(0)->printAsOperand(OS, MST); - else - OS << "<unknown>"; - OS << ")"; - } - - // Print AA scope info. - if (const MDNode *ScopeInfo = getAAInfo().Scope) { - OS << "(alias.scope="; - if (ScopeInfo->getNumOperands() > 0) - for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { - ScopeInfo->getOperand(i)->printAsOperand(OS, MST); - if (i != ie-1) - OS << ","; - } - else - OS << "<unknown>"; - OS << ")"; - } - - // Print AA noalias scope info. - if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { - OS << "(noalias="; - if (NoAliasInfo->getNumOperands() > 0) - for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { - NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); - if (i != ie-1) - OS << ","; - } - else - OS << "<unknown>"; - OS << ")"; - } - - if (isNonTemporal()) - OS << "(nontemporal)"; - if (isDereferenceable()) - OS << "(dereferenceable)"; - if (isInvariant()) - OS << "(invariant)"; - if (getFlags() & MOTargetFlag1) - OS << "(flag1)"; - if (getFlags() & MOTargetFlag2) - OS << "(flag2)"; - if (getFlags() & MOTargetFlag3) - OS << "(flag3)"; -} - -//===----------------------------------------------------------------------===// -// MachineInstr Implementation -//===----------------------------------------------------------------------===// - void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; @@ -1034,7 +346,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs)) return std::make_pair(nullptr, 0); - MachineFunction *MF = getParent()->getParent(); + MachineFunction *MF = getMF(); mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs); mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(), MemBegin); @@ -1108,9 +420,9 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (Check == IgnoreDefs) continue; else if (Check == IgnoreVRegDefs) { - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || - TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) - if (MO.getReg() != OMO.getReg()) + if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) || + !TargetRegisterInfo::isVirtualRegister(OMO.getReg())) + if (!MO.isIdenticalTo(OMO)) return false; } else { if (!MO.isIdenticalTo(OMO)) @@ -1133,6 +445,10 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, return true; } +const MachineFunction *MachineInstr::getMF() const { + return getParent()->getParent(); +} + MachineInstr *MachineInstr::removeFromParent() { assert(getParent() && "Not embedded in a basic block!"); return getParent()->remove(this); @@ -1282,8 +598,8 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { assert(getParent() && "Can't have an MBB reference here!"); - assert(getParent()->getParent() && "Can't have an MF reference here!"); - const MachineFunction &MF = *getParent()->getParent(); + assert(getMF() && "Can't have an MF reference here!"); + const MachineFunction &MF = *getMF(); // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) @@ -1427,7 +743,7 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg, if (MO.isUse()) Use |= !MO.isUndef(); else if (MO.getSubReg() && !MO.isUndef()) - // A partial <def,undef> doesn't count as reading the register. + // A partial def undef doesn't count as reading the register. PartDef = true; else FullDef = true; @@ -1619,7 +935,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { // Treat volatile loads as stores. This is not strictly necessary for // volatiles, but it is required for atomic loads. It is not allowed to move // a load across an atomic load with Ordering > Monotonic. - if (mayStore() || isCall() || + if (mayStore() || isCall() || isPHI() || (mayLoad() && hasOrderedMemoryRef())) { SawStore = true; return false; @@ -1644,8 +960,9 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA) { - const MachineFunction *MF = getParent()->getParent(); + const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); // If neither instruction stores to memory, they can't alias in any // meaningful way, even if they read from the same address. @@ -1656,9 +973,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) return false; - if (!AA) - return true; - // FIXME: Need to handle multiple memory operands to support all targets. if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; @@ -1666,9 +980,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, MachineMemOperand *MMOa = *memoperands_begin(); MachineMemOperand *MMOb = *Other.memoperands_begin(); - if (!MMOa->getValue() || !MMOb->getValue()) - return true; - // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -1681,22 +992,53 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // FIXME: Even before we go to AA we can reason locally about some + // Even before we go to AA we can reason locally about some // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + + int64_t MinOffset = std::min(OffsetA, OffsetB); + int64_t WidthA = MMOa->getSize(); + int64_t WidthB = MMOb->getSize(); + const Value *ValA = MMOa->getValue(); + const Value *ValB = MMOb->getValue(); + bool SameVal = (ValA && ValB && (ValA == ValB)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && ValB && !PSVa->mayAlias(&MFI)) + return false; + if (PSVb && ValA && !PSVb->mayAlias(&MFI)) + return false; + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } - int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); - int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; - int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + if (SameVal) { + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } + + if (!AA) + return true; + + if (!ValA || !ValB) + return true; - AliasResult AAResult = - AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + + int64_t Overlapa = WidthA + OffsetA - MinOffset; + int64_t Overlapb = WidthB + OffsetB - MinOffset; + + AliasResult AAResult = AA->alias( + MemoryLocation(ValA, Overlapa, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, Overlapb, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); } @@ -1822,6 +1164,41 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF, } } +bool MachineInstr::hasComplexRegisterTies() const { + const MCInstrDesc &MCID = getDesc(); + for (unsigned I = 0, E = getNumOperands(); I < E; ++I) { + const auto &Operand = getOperand(I); + if (!Operand.isReg() || Operand.isDef()) + // Ignore the defined registers as MCID marks only the uses as tied. + continue; + int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); + int TiedIdx = Operand.isTied() ? int(findTiedOperandIdx(I)) : -1; + if (ExpectedTiedIdx != TiedIdx) + return true; + } + return false; +} + +LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes, + const MachineRegisterInfo &MRI) const { + const MachineOperand &Op = getOperand(OpIdx); + if (!Op.isReg()) + return LLT{}; + + if (isVariadic() || OpIdx >= getNumExplicitOperands()) + return MRI.getType(Op.getReg()); + + auto &OpInfo = getDesc().OpInfo[OpIdx]; + if (!OpInfo.isGenericType()) + return MRI.getType(Op.getReg()); + + if (PrintedTypes[OpInfo.getGenericTypeIndex()]) + return LLT{}; + + PrintedTypes.set(OpInfo.getGenericTypeIndex()); + return MRI.getType(Op.getReg()); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineInstr::dump() const { dbgs() << " "; @@ -1834,7 +1211,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc, const Module *M = nullptr; if (const MachineBasicBlock *MBB = getParent()) if (const MachineFunction *MF = MBB->getParent()) - M = MF->getFunction()->getParent(); + M = MF->getFunction().getParent(); ModuleSlotTracker MST(M); print(OS, MST, SkipOpers, SkipDebugLoc, TII); @@ -1863,21 +1240,31 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, // Save a list of virtual registers. SmallVector<unsigned, 8> VirtRegs; + SmallBitVector PrintedTypes(8); + bool ShouldPrintRegisterTies = hasComplexRegisterTies(); + auto getTiedOperandIdx = [&](unsigned OpIdx) { + if (!ShouldPrintRegisterTies) + return 0U; + const MachineOperand &MO = getOperand(OpIdx); + if (MO.isReg() && MO.isTied() && !MO.isDef()) + return findTiedOperandIdx(OpIdx); + return 0U; + }; // Print explicitly defined operands on the left of an assignment syntax. unsigned StartOp = 0, e = getNumOperands(); for (; StartOp < e && getOperand(StartOp).isReg() && - getOperand(StartOp).isDef() && - !getOperand(StartOp).isImplicit(); + getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit(); ++StartOp) { - if (StartOp != 0) OS << ", "; - getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo); + if (StartOp != 0) + OS << ", "; + LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(StartOp); + getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, + IntrinsicInfo); unsigned Reg = getOperand(StartOp).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) VirtRegs.push_back(Reg); - LLT Ty = MRI ? MRI->getType(Reg) : LLT{}; - if (Ty.isValid()) - OS << '(' << Ty << ')'; - } } if (StartOp != 0) @@ -1900,7 +1287,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { // Print asm string. OS << " "; - getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI); + const unsigned OpIdx = InlineAsm::MIOp_AsmString; + LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx); + getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, + IntrinsicInfo); // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1943,8 +1335,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata()); if (DIV && !DIV->getName().empty()) OS << "!\"" << DIV->getName() << '\"'; - else - MO.print(OS, MST, TRI); + else { + LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(i); + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + } } else if (TRI && (isInsertSubreg() || isRegSequence() || (isSubregToReg() && i == 3)) && MO.isImm()) { OS << TRI->getSubRegIndexName(MO.getImm()); @@ -2006,8 +1402,15 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, // Compute the index of the next operand descriptor. AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); - } else - MO.print(OS, MST, TRI); + } else { + LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(i); + if (MO.isImm() && isOperandSubregIdx(i)) + MachineOperand::printSubregIdx(OS, MO.getImm(), TRI); + else + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + } } bool HaveSemi = false; @@ -2057,14 +1460,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else OS << " " << TRI->getRegClassName(RC.get<const TargetRegisterClass *>()); - OS << ':' << PrintReg(VirtRegs[i]); + OS << ':' << printReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) { ++j; continue; } if (VirtRegs[i] != VirtRegs[j]) - OS << "," << PrintReg(VirtRegs[j]); + OS << "," << printReg(VirtRegs[j]); VirtRegs.erase(VirtRegs.begin()+j); } } @@ -2328,8 +1731,8 @@ void MachineInstr::emitError(StringRef Msg) const { MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, unsigned Offset, - const MDNode *Variable, const MDNode *Expr) { + unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && @@ -2337,53 +1740,60 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, if (IsIndirect) return BuildMI(MF, DL, MCID) .addReg(Reg, RegState::Debug) - .addImm(Offset) + .addImm(0U) .addMetadata(Variable) .addMetadata(Expr); - else { - assert(Offset == 0 && "A direct address cannot have an offset."); + else return BuildMI(MF, DL, MCID) .addReg(Reg, RegState::Debug) .addReg(0U, RegState::Debug) .addMetadata(Variable) .addMetadata(Expr); - } } MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, unsigned Reg, - unsigned Offset, const MDNode *Variable, - const MDNode *Expr) { + const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = - BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr); + MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr); BB.insert(I, MI); return MachineInstrBuilder(MF, MI); } +/// Compute the new DIExpression to use with a DBG_VALUE for a spill slot. +/// This prepends DW_OP_deref when spilling an indirect DBG_VALUE. +static const DIExpression *computeExprForSpill(const MachineInstr &MI) { + assert(MI.getOperand(0).isReg() && "can't spill non-register"); + assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + + const DIExpression *Expr = MI.getDebugExpression(); + if (MI.isIndirectDebugValue()) { + assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + } + return Expr; +} + MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const MachineInstr &Orig, int FrameIndex) { - const MDNode *Var = Orig.getDebugVariable(); - const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression()); - bool IsIndirect = Orig.isIndirectDebugValue(); - uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0; - DebugLoc DL = Orig.getDebugLoc(); - assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - // If the DBG_VALUE already was a memory location, add an extra - // DW_OP_deref. Otherwise just turning this from a register into a - // memory/indirect location is sufficient. - if (IsIndirect) - Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); - return BuildMI(BB, I, DL, Orig.getDesc()) + const DIExpression *Expr = computeExprForSpill(Orig); + return BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc()) .addFrameIndex(FrameIndex) - .addImm(Offset) - .addMetadata(Var) + .addImm(0U) + .addMetadata(Orig.getDebugVariable()) .addMetadata(Expr); } + +void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) { + const DIExpression *Expr = computeExprForSpill(Orig); + Orig.getOperand(0).ChangeToFrameIndex(FrameIndex); + Orig.getOperand(1).ChangeToImmediate(0U); + Orig.getOperand(3).setMetadata(Expr); +} diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index b5621a09c6ff..ed16a2b6084c 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -13,10 +13,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <utility> using namespace llvm; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index c7113f1fdc47..75d449c7ac6f 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -1,4 +1,4 @@ -//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// +//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===// // // The LLVM Compiler Infrastructure // @@ -16,26 +16,42 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <limits> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "machinelicm" @@ -68,6 +84,7 @@ STATISTIC(NumPostRAHoisted, "Number of machine instructions hoisted out of loops post regalloc"); namespace { + class MachineLICM : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetLoweringBase *TLI; @@ -75,7 +92,7 @@ namespace { const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; TargetSchedModel SchedModel; - bool PreRegAlloc; + bool PreRegAlloc = true; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. @@ -89,7 +106,7 @@ namespace { MachineBasicBlock *CurPreheader; // The preheader for CurLoop. // Exit blocks for CurLoop. - SmallVector<MachineBasicBlock*, 8> ExitBlocks; + SmallVector<MachineBasicBlock *, 8> ExitBlocks; bool isExitBlock(const MachineBasicBlock *MBB) const { return is_contained(ExitBlocks, MBB); @@ -107,7 +124,7 @@ namespace { SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; // For each opcode, keep a list of potential CSE instructions. - DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap; enum { SpeculateFalse = 0, @@ -122,15 +139,15 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - MachineLICM() : - MachineFunctionPass(ID), PreRegAlloc(true) { - initializeMachineLICMPass(*PassRegistry::getPassRegistry()); - } - explicit MachineLICM(bool PreRA) : - MachineFunctionPass(ID), PreRegAlloc(PreRA) { + MachineLICM() : MachineFunctionPass(ID) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + + explicit MachineLICM(bool PreRA) + : MachineFunctionPass(ID), PreRegAlloc(PreRA) { initializeMachineLICMPass(*PassRegistry::getPassRegistry()); - } + } bool runOnMachineFunction(MachineFunction &MF) override; @@ -157,6 +174,7 @@ namespace { MachineInstr *MI; unsigned Def; int FI; + CandidateInfo(MachineInstr *mi, unsigned def, int fi) : MI(mi), Def(def), FI(fi) {} }; @@ -233,10 +251,13 @@ namespace { MachineBasicBlock *getCurPreheader(); }; + } // end anonymous namespace char MachineLICM::ID = 0; + char &llvm::MachineLICMID = MachineLICM::ID; + INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -259,7 +280,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { } bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; Changed = FirstInLoop = false; @@ -425,7 +446,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, // Only consider reloads for now and remats which do not have register // operands. FIXME: Consider unfold load folding instructions. if (Def && !RuledOut) { - int FI = INT_MIN; + int FI = std::numeric_limits<int>::min(); if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI))) Candidates.push_back(CandidateInfo(MI, Def, FI)); @@ -492,7 +513,7 @@ void MachineLICM::HoistRegionPostRA() { // registers read by the terminator. Similarly its def should not be // clobbered by the terminator. for (CandidateInfo &Candidate : Candidates) { - if (Candidate.FI != INT_MIN && + if (Candidate.FI != std::numeric_limits<int>::min() && StoredFIs.count(Candidate.FI)) continue; @@ -542,8 +563,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" - << MI->getParent()->getNumber() << ": " << *MI); + DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from " + << printMBBReference(*MI->getParent()) << ": " << *MI); // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); @@ -580,14 +601,14 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { } void MachineLICM::EnterScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); // Remember livein register pressure. BackTrace.push_back(RegPressure); } void MachineLICM::ExitScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); BackTrace.pop_back(); } @@ -617,7 +638,6 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. -/// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -836,7 +856,7 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, /// Return true if this machine instruction loads from global offset table or /// constant pool. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert (MI.mayLoad() && "Expected MI that loads!"); + assert(MI.mayLoad() && "Expected MI that loads!"); // If we lost memory operands, conservatively assume that the instruction // reads from everything.. @@ -876,7 +896,6 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { /// I.e., all virtual register operands are defined outside of the loop, /// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. -/// bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { if (!IsLICMCandidate(I)) return false; @@ -898,8 +917,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // However, if the physreg is known to always be caller saved/restored // then this use is safe to hoist. if (!MRI->isConstantPhysReg(Reg) && - !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent()))) - return false; + !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF()))) + return false; // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { @@ -928,7 +947,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { return true; } - /// Return true if the specified instruction is used by a phi node and hoisting /// it could cause a copy to be inserted. bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { @@ -1173,7 +1191,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { &LoadRegIndex); if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); - MachineFunction &MF = *MI->getParent()->getParent(); + MachineFunction &MF = *MI->getMF(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); @@ -1233,7 +1251,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, /// the existing instruction rather than hoisting the instruction to the /// preheader. bool MachineLICM::EliminateCSE(MachineInstr *MI, - DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { + DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. if (CI == CSEMap.end() || MI->isImplicitDef()) @@ -1292,7 +1310,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// the loop. bool MachineLICM::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator CI = CSEMap.find(Opcode); // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. @@ -1318,9 +1336,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { DEBUG({ dbgs() << "Hoisting " << *MI; if (MI->getParent()->getBasicBlock()) - dbgs() << " from BB#" << MI->getParent()->getNumber(); + dbgs() << " from " << printMBBReference(*MI->getParent()); if (Preheader->getBasicBlock()) - dbgs() << " to BB#" << Preheader->getNumber(); + dbgs() << " to " << printMBBReference(*Preheader); dbgs() << "\n"; }); @@ -1333,7 +1351,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator CI = CSEMap.find(Opcode); if (!EliminateCSE(MI, CI)) { // Otherwise, splice the instruction to the preheader. diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 825290a438a6..8f0b89657d02 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -26,7 +27,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> @@ -207,9 +207,9 @@ bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = nullptr; CurCallSite = 0; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; + HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; TheModule = &M; - return false; } @@ -276,7 +276,8 @@ MachineModuleInfo::getOrCreateMachineFunction(const Function &F) { MachineFunction *MF; if (I.second) { // No pre-existing machine function, create a new one. - MF = new MachineFunction(&F, TM, NextFnNum++, *this); + const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F); + MF = new MachineFunction(F, TM, STI, NextFnNum++, *this); // Update the set entry. I.first->second.reset(MF); } else { diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 22d519e5d88f..07b173bc94f8 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCSymbol.h" + using namespace llvm; //===----------------------------------------------------------------------===// @@ -24,21 +26,17 @@ using namespace llvm; void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} -static int SortSymbolPair(const void *LHS, const void *RHS) { - typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; - const MCSymbol *LHSS = ((const PairTy *)LHS)->first; - const MCSymbol *RHSS = ((const PairTy *)RHS)->first; - return LHSS->getName().compare(RHSS->getName()); +using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>; +static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) { + return LHS->first->getName().compare(RHS->first->getName()); } MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs( DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) { MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); - if (!List.empty()) - qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + array_pod_sort(List.begin(), List.end(), SortSymbolPair); Map.clear(); return List; } - diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp new file mode 100644 index 000000000000..d17c481862a1 --- /dev/null +++ b/lib/CodeGen/MachineOperand.cpp @@ -0,0 +1,936 @@ +//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Methods common to all machine operands. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<int> + PrintRegMaskNumRegs("print-regmask-num-regs", + cl::desc("Number of registers to limit to when " + "printing regmask operands in IR dumps. " + "unlimited = -1"), + cl::init(32), cl::Hidden); + +static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) { + if (const MachineInstr *MI = MO.getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF; + return nullptr; +} +static MachineFunction *getMFIfAvailable(MachineOperand &MO) { + return const_cast<MachineFunction *>( + getMFIfAvailable(const_cast<const MachineOperand &>(MO))); +} + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) + return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineFunction *MF = getMFIfAvailable(*this)) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + SmallContents.RegNo = Reg; + MRI.addRegOperandToUseList(this); + return; + } + + // Otherwise, just change the register, no problem. :) + SmallContents.RegNo = Reg; +} + +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + // Note that getSubReg() may return 0 if the sub-register doesn't exist. + // That won't happen in legal code. + setSubReg(0); + if (isDef()) + setIsUndef(false); + } + setReg(Reg); +} + +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + assert(!IsDeadOrKill && "Changing def/use with dead/kill set not supported"); + // MRI may keep uses and defs in different list positions. + if (MachineFunction *MF = getMFIfAvailable(*this)) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + +bool MachineOperand::isRenamable() const { + assert(isReg() && "Wrong MachineOperand accessor"); + assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + "isRenamable should only be checked on physical registers"); + return IsRenamable; +} + +void MachineOperand::setIsRenamable(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + "setIsRenamable should only be called on physical registers"); + if (const MachineInstr *MI = getParent()) + if ((isDef() && MI->hasExtraDefRegAllocReq()) || + (isUse() && MI->hasExtraSrcRegAllocReq())) + assert(!Val && "isRenamable should be false for " + "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes"); + IsRenamable = Val; +} + +void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() { + if (const MachineInstr *MI = getParent()) + if ((isDef() && MI->hasExtraDefRegAllocReq()) || + (isUse() && MI->hasExtraSrcRegAllocReq())) + return; + + setIsRenamable(true); +} + +// If this operand is currently a register operand, and if this is in a +// function, deregister the operand from the register's use/def list. +void MachineOperand::removeRegFromUses() { + if (!isReg() || !isOnRegUseList()) + return; + + if (MachineFunction *MF = getMFIfAvailable(*this)) + MF->getRegInfo().removeRegOperandFromUseList(this); +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_FPImmediate; + Contents.CFP = FPImm; +} + +void MachineOperand::ChangeToES(const char *SymName, + unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an external symbol"); + + removeRegFromUses(); + + OpKind = MO_ExternalSymbol; + Contents.OffsetedInfo.Val.SymbolName = SymName; + setOffset(0); // Offset is always 0. + setTargetFlags(TargetFlags); +} + +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an MCSymbol"); + + removeRegFromUses(); + + OpKind = MO_MCSymbol; + Contents.Sym = Sym; +} + +void MachineOperand::ChangeToFrameIndex(int Idx) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a FrameIndex"); + + removeRegFromUses(); + + OpKind = MO_FrameIndex; + setIndex(Idx); +} + +void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, + unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a FrameIndex"); + + removeRegFromUses(); + + OpKind = MO_TargetIndex; + setIndex(Idx); + setOffset(Offset); + setTargetFlags(TargetFlags); +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead, bool isUndef, + bool isDebug) { + MachineRegisterInfo *RegInfo = nullptr; + if (MachineFunction *MF = getMFIfAvailable(*this)) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the + // register's use/def lists. + bool WasReg = isReg(); + if (RegInfo && WasReg) + RegInfo->removeRegOperandFromUseList(this); + + // Change this to a register and set the reg#. + assert(!(isDead && !isDef) && "Dead flag on non-def"); + assert(!(isKill && isDef) && "Kill flag on def"); + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg_TargetFlags = 0; + IsDef = isDef; + IsImp = isImp; + IsDeadOrKill = isKill | isDead; + IsRenamable = false; + IsUndef = isUndef; + IsInternalRead = false; + IsEarlyClobber = false; + IsDebug = isDebug; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = nullptr; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. Note that this should stay in sync with the hash_value overload +/// below. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType() || + getTargetFlags() != Other.getTargetFlags()) + return false; + + switch (getType()) { + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_CImmediate: + return getCImm() == Other.getCImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && + getOffset() == Other.getOffset(); + case MachineOperand::MO_BlockAddress: + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: { + // Shallow compare of the two RegMasks + const uint32_t *RegMask = getRegMask(); + const uint32_t *OtherRegMask = Other.getRegMask(); + if (RegMask == OtherRegMask) + return true; + + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + // Calculate the size of the RegMask + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + + // Deep compare of the two RegMasks + return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); + } + // We don't know the size of the RegMask, so we can't deep compare the two + // reg masks. + return false; + } + case MachineOperand::MO_MCSymbol: + return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_CFIIndex: + return getCFIIndex() == Other.getCFIIndex(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); + case MachineOperand::MO_IntrinsicID: + return getIntrinsicID() == Other.getIntrinsicID(); + case MachineOperand::MO_Predicate: + return getPredicate() == Other.getPredicate(); + } + llvm_unreachable("Invalid machine operand type"); +} + +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(), + MO.getOffset()); + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + case MachineOperand::MO_CFIIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); + case MachineOperand::MO_IntrinsicID: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); + case MachineOperand::MO_Predicate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); + } + llvm_unreachable("Invalid machine operand type"); +} + +// Try to crawl up to the machine function and get TRI and IntrinsicInfo from +// it. +static void tryToGetTargetInfo(const MachineOperand &MO, + const TargetRegisterInfo *&TRI, + const TargetIntrinsicInfo *&IntrinsicInfo) { + if (const MachineFunction *MF = getMFIfAvailable(MO)) { + TRI = MF->getSubtarget().getRegisterInfo(); + IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); + } +} + +static void printOffset(raw_ostream &OS, int64_t Offset) { + if (Offset == 0) + return; + if (Offset < 0) { + OS << " - " << -Offset; + return; + } + OS << " + " << Offset; +} + +static const char *getTargetIndexName(const MachineFunction &MF, int Index) { + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + auto Found = find_if(Indices, [&](const std::pair<int, const char *> &I) { + return I.first == Index; + }); + if (Found != Indices.end()) + return Found->second; + return nullptr; +} + +static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TF) { + return I.second; + } + } + return nullptr; +} + +void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index, + const TargetRegisterInfo *TRI) { + OS << "%subreg."; + if (TRI) + OS << TRI->getSubRegIndexName(Index); + else + OS << Index; +} + +void MachineOperand::printTargetFlags(raw_ostream &OS, + const MachineOperand &Op) { + if (!Op.getTargetFlags()) + return; + const MachineFunction *MF = getMFIfAvailable(Op); + if (!MF) + return; + + const auto *TII = MF->getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); + OS << "target-flags("; + const bool HasDirectFlags = Flags.first; + const bool HasBitmaskFlags = Flags.second; + if (!HasDirectFlags && !HasBitmaskFlags) { + OS << "<unknown>) "; + return; + } + if (HasDirectFlags) { + if (const auto *Name = getTargetFlagName(TII, Flags.first)) + OS << Name; + else + OS << "<unknown target flag>"; + } + if (!HasBitmaskFlags) { + OS << ") "; + return; + } + bool IsCommaNeeded = HasDirectFlags; + unsigned BitMask = Flags.second; + auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &Mask : BitMasks) { + // Check if the flag's bitmask has the bits of the current mask set. + if ((BitMask & Mask.first) == Mask.first) { + if (IsCommaNeeded) + OS << ", "; + IsCommaNeeded = true; + OS << Mask.second; + // Clear the bits which were serialized from the flag's bitmask. + BitMask &= ~(Mask.first); + } + } + if (BitMask) { + // When the resulting flag's bitmask isn't zero, we know that we didn't + // serialize all of the bit flags. + if (IsCommaNeeded) + OS << ", "; + OS << "<unknown bitmask target flag>"; + } + OS << ") "; +} + +void MachineOperand::printSymbol(raw_ostream &OS, MCSymbol &Sym) { + OS << "<mcsymbol " << Sym << ">"; +} + +void MachineOperand::printStackObjectReference(raw_ostream &OS, + unsigned FrameIndex, + bool IsFixed, StringRef Name) { + if (IsFixed) { + OS << "%fixed-stack." << FrameIndex; + return; + } + + OS << "%stack." << FrameIndex; + if (!Name.empty()) + OS << '.' << Name; +} + +void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { + tryToGetTargetInfo(*this, TRI, IntrinsicInfo); + ModuleSlotTracker DummyMST(nullptr); + print(OS, DummyMST, LLT{}, /*PrintDef=*/false, + /*ShouldPrintRegisterTies=*/true, + /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); +} + +void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, + LLT TypeToPrint, bool PrintDef, + bool ShouldPrintRegisterTies, + unsigned TiedOperandIdx, + const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { + printTargetFlags(OS, *this); + switch (getType()) { + case MachineOperand::MO_Register: { + unsigned Reg = getReg(); + if (isImplicit()) + OS << (isDef() ? "implicit-def " : "implicit "); + else if (PrintDef && isDef()) + // Print the 'def' flag only when the operand is defined after '='. + OS << "def "; + if (isInternalRead()) + OS << "internal "; + if (isDead()) + OS << "dead "; + if (isKill()) + OS << "killed "; + if (isUndef()) + OS << "undef "; + if (isEarlyClobber()) + OS << "early-clobber "; + if (isDebug()) + OS << "debug-use "; + if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) + OS << "renamable "; + OS << printReg(Reg, TRI); + // Print the sub register. + if (unsigned SubReg = getSubReg()) { + if (TRI) + OS << '.' << TRI->getSubRegIndexName(SubReg); + else + OS << ".subreg" << SubReg; + } + // Print the register class / bank. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (!PrintDef || MRI.def_empty(Reg)) { + OS << ':'; + OS << printRegClassOrBank(Reg, MRI, TRI); + } + } + } + // Print ties. + if (ShouldPrintRegisterTies && isTied() && !isDef()) + OS << "(tied-def " << TiedOperandIdx << ")"; + // Print types. + if (TypeToPrint.isValid()) + OS << '(' << TypeToPrint << ')'; + break; + } + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_CImmediate: + getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_FPImmediate: + if (getFPImm()->getType()->isFloatTy()) { + OS << getFPImm()->getValueAPF().convertToFloat(); + } else if (getFPImm()->getType()->isHalfTy()) { + APFloat APF = getFPImm()->getValueAPF(); + bool Unused; + APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused); + OS << "half " << APF.convertToFloat(); + } else if (getFPImm()->getType()->isFP128Ty()) { + APFloat APF = getFPImm()->getValueAPF(); + SmallString<16> Str; + getFPImm()->getValueAPF().toString(Str); + OS << "quad " << Str; + } else if (getFPImm()->getType()->isX86_FP80Ty()) { + APFloat APF = getFPImm()->getValueAPF(); + OS << "x86_fp80 0xK"; + APInt API = APF.bitcastToAPInt(); + OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + } else { + OS << getFPImm()->getValueAPF().convertToDouble(); + } + break; + case MachineOperand::MO_MachineBasicBlock: + OS << printMBBReference(*getMBB()); + break; + case MachineOperand::MO_FrameIndex: { + int FrameIndex = getIndex(); + bool IsFixed = false; + StringRef Name; + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + const MachineFrameInfo &MFI = MF->getFrameInfo(); + IsFixed = MFI.isFixedObjectIndex(FrameIndex); + if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex)) + if (Alloca->hasName()) + Name = Alloca->getName(); + if (IsFixed) + FrameIndex -= MFI.getObjectIndexBegin(); + } + printStackObjectReference(OS, FrameIndex, IsFixed, Name); + break; + } + case MachineOperand::MO_ConstantPoolIndex: + OS << "%const." << getIndex(); + printOffset(OS, getOffset()); + break; + case MachineOperand::MO_TargetIndex: { + OS << "target-index("; + const char *Name = "<unknown>"; + if (const MachineFunction *MF = getMFIfAvailable(*this)) + if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex())) + Name = TargetIndexName; + OS << Name << ')'; + printOffset(OS, getOffset()); + break; + } + case MachineOperand::MO_JumpTableIndex: + OS << printJumpTableEntryReference(getIndex()); + break; + case MachineOperand::MO_GlobalAddress: + getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + printOffset(OS, getOffset()); + break; + case MachineOperand::MO_ExternalSymbol: { + StringRef Name = getSymbolName(); + OS << '$'; + if (Name.empty()) { + OS << "\"\""; + } else { + printLLVMNameWithoutPrefix(OS, Name); + } + printOffset(OS, getOffset()); + break; + } + case MachineOperand::MO_BlockAddress: + OS << '<'; + getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST); + if (getOffset()) + OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_RegisterMask: { + OS << "<regmask"; + if (TRI) { + unsigned NumRegsInMask = 0; + unsigned NumRegsEmitted = 0; + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + unsigned MaskWord = i / 32; + unsigned MaskBit = i % 32; + if (getRegMask()[MaskWord] & (1 << MaskBit)) { + if (PrintRegMaskNumRegs < 0 || + NumRegsEmitted <= static_cast<unsigned>(PrintRegMaskNumRegs)) { + OS << " " << printReg(i, TRI); + NumRegsEmitted++; + } + NumRegsInMask++; + } + } + if (NumRegsEmitted != NumRegsInMask) + OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; + } else { + OS << " ..."; + } + OS << ">"; + break; + } + case MachineOperand::MO_RegisterLiveOut: { + const uint32_t *RegMask = getRegLiveOut(); + OS << "liveout("; + if (!TRI) { + OS << "<unknown>"; + } else { + bool IsCommaNeeded = false; + for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { + if (RegMask[Reg / 32] & (1U << (Reg % 32))) { + if (IsCommaNeeded) + OS << ", "; + OS << printReg(Reg, TRI); + IsCommaNeeded = true; + } + } + } + OS << ")"; + break; + } + case MachineOperand::MO_Metadata: + getMetadata()->printAsOperand(OS, MST); + break; + case MachineOperand::MO_MCSymbol: + printSymbol(OS, *getMCSymbol()); + break; + case MachineOperand::MO_CFIIndex: + OS << "<call frame instruction>"; + break; + case MachineOperand::MO_IntrinsicID: { + Intrinsic::ID ID = getIntrinsicID(); + if (ID < Intrinsic::num_intrinsics) + OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>'; + else if (IntrinsicInfo) + OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>'; + else + OS << "<intrinsic:" << ID << '>'; + break; + } + case MachineOperand::MO_Predicate: { + auto Pred = static_cast<CmpInst::Predicate>(getPredicate()); + OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred") + << CmpInst::getPredicateName(Pred) << '>'; + break; + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MachineOperand::dump() const { dbgs() << *this << '\n'; } +#endif + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +/// getAddrSpace - Return the LLVM IR address space number that this pointer +/// points into. +unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; } + +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is<const Value *>()) + return false; + + const Value *BasePtr = V.get<const Value *>(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer( + BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); +} + +/// getConstantPool - Return a MachinePointerInfo record that refers to the +/// constant pool. +MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getConstantPool()); +} + +/// getFixedStack - Return a MachinePointerInfo record that refers to the +/// the specified FrameIndex. +MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, + int FI, int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); +} + +MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getJumpTable()); +} + +MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getGOT()); +} + +MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, + int64_t Offset, uint8_t ID) { + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID); +} + +MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { + return MachinePointerInfo(MF.getDataLayout().getAllocaAddrSpace()); +} + +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, + uint64_t s, unsigned int a, + const AAMDNodes &AAInfo, + const MDNode *Ranges, SyncScope::ID SSID, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) + : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), + AAInfo(AAInfo), Ranges(Ranges) { + assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() || + isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) && + "invalid pointer value"); + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); + + AtomicInfo.SSID = static_cast<unsigned>(SSID); + assert(getSyncScopeID() == SSID && "Value truncated"); + AtomicInfo.Ordering = static_cast<unsigned>(Ordering); + assert(getOrdering() == Ordering && "Value truncated"); + AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering); + assert(getFailureOrdering() == FailureOrdering && "Value truncated"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(getOffset()); + ID.AddInteger(Size); + ID.AddPointer(getOpaqueValue()); + ID.AddInteger(getFlags()); + ID.AddInteger(getBaseAlignment()); +} + +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1; + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + PtrInfo = MMO->PtrInfo; + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +void MachineMemOperand::print(raw_ostream &OS) const { + ModuleSlotTracker DummyMST(nullptr); + print(OS, DummyMST); +} +void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { + assert((isLoad() || isStore()) && "SV has to be a load, store or both."); + + if (isVolatile()) + OS << "Volatile "; + + if (isLoad()) + OS << "LD"; + if (isStore()) + OS << "ST"; + OS << getSize(); + + // Print the address information. + OS << "["; + if (const Value *V = getValue()) + V->printAsOperand(OS, /*PrintType=*/false, MST); + else if (const PseudoSourceValue *PSV = getPseudoValue()) + PSV->printCustom(OS); + else + OS << "<unknown>"; + + unsigned AS = getAddrSpace(); + if (AS != 0) + OS << "(addrspace=" << AS << ')'; + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (getBaseAlignment() != getAlignment()) + OS << "(align=" << getBaseAlignment() << ")"; + + if (getOffset() != 0) + OS << "+" << getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) + OS << "(align=" << getAlignment() << ")"; + + // Print TBAA info. + if (const MDNode *TBAAInfo = getAAInfo().TBAA) { + OS << "(tbaa="; + if (TBAAInfo->getNumOperands() > 0) + TBAAInfo->getOperand(0)->printAsOperand(OS, MST); + else + OS << "<unknown>"; + OS << ")"; + } + + // Print AA scope info. + if (const MDNode *ScopeInfo = getAAInfo().Scope) { + OS << "(alias.scope="; + if (ScopeInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { + ScopeInfo->getOperand(i)->printAsOperand(OS, MST); + if (i != ie - 1) + OS << ","; + } + else + OS << "<unknown>"; + OS << ")"; + } + + // Print AA noalias scope info. + if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { + OS << "(noalias="; + if (NoAliasInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { + NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); + if (i != ie - 1) + OS << ","; + } + else + OS << "<unknown>"; + OS << ")"; + } + + if (const MDNode *Ranges = getRanges()) { + unsigned NumRanges = Ranges->getNumOperands(); + if (NumRanges != 0) { + OS << "(ranges="; + + for (unsigned I = 0; I != NumRanges; ++I) { + Ranges->getOperand(I)->printAsOperand(OS, MST); + if (I != NumRanges - 1) + OS << ','; + } + + OS << ')'; + } + } + + if (isNonTemporal()) + OS << "(nontemporal)"; + if (isDereferenceable()) + OS << "(dereferenceable)"; + if (isInvariant()) + OS << "(invariant)"; + if (getFlags() & MOTargetFlag1) + OS << "(flag1)"; + if (getFlags() & MOTargetFlag2) + OS << "(flag2)"; + if (getFlags() & MOTargetFlag3) + OS << "(flag3)"; +} diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 73c3428a6e53..ca4452218da1 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" @@ -51,24 +50,15 @@ void MachineOptimizationRemarkEmitter::emit( auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon); computeHotness(OptDiag); - LLVMContext &Ctx = MF.getFunction()->getContext(); + LLVMContext &Ctx = MF.getFunction().getContext(); - // If a diagnostic has a hotness value, then only emit it if its hotness - // meets the threshold. - if (OptDiag.getHotness() && - *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) { + // Only emit it if its hotness meets the threshold. + if (OptDiag.getHotness().getValueOr(0) < + Ctx.getDiagnosticsHotnessThreshold()) { return; } - yaml::Output *Out = Ctx.getDiagnosticsOutputFile(); - if (Out) { - auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon); - *Out << P; - } - // FIXME: now that IsVerbose is part of DI, filtering for this will be moved - // from here to clang. - if (!OptDiag.isVerbose() || shouldEmitVerbose()) - Ctx.diagnose(OptDiag); + Ctx.diagnose(OptDiag); } MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass() @@ -81,7 +71,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( MachineFunction &MF) { MachineBlockFrequencyInfo *MBFI; - if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested()) + if (MF.getFunction().getContext().getDiagnosticsHotnessRequested()) MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI(); else MBFI = nullptr; diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index fd6b2427891d..e4eb8802ac66 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -15,6 +15,23 @@ /// instructions. If a sequence of instructions appears often, then it ought /// to be beneficial to pull out into a function. /// +/// The MachineOutliner communicates with a given target using hooks defined in +/// TargetInstrInfo.h. The target supplies the outliner with information on how +/// a specific sequence of instructions should be outlined. This information +/// is used to deduce the number of instructions necessary to +/// +/// * Create an outlined function +/// * Call that outlined function +/// +/// Targets must implement +/// * getOutliningCandidateInfo +/// * insertOutlinerEpilogue +/// * insertOutlinedCall +/// * insertOutlinerPrologue +/// * isFunctionSafeToOutlineFrom +/// +/// in order to make use of the MachineOutliner. +/// /// This was originally presented at the 2016 LLVM Developers' Meeting in the /// talk "Reducing Code Size Using Outlining". For a high-level overview of /// how this pass works, the talk is available on YouTube at @@ -42,19 +59,17 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <functional> #include <map> #include <sstream> @@ -64,6 +79,7 @@ #define DEBUG_TYPE "machine-outliner" using namespace llvm; +using namespace ore; STATISTIC(NumOutlined, "Number of candidates outlined"); STATISTIC(FunctionsCreated, "Number of functions created"); @@ -73,19 +89,32 @@ namespace { /// \brief An individual sequence of instructions to be replaced with a call to /// an outlined function. struct Candidate { +private: + /// The start index of this \p Candidate in the instruction list. + unsigned StartIdx; + /// The number of instructions in this \p Candidate. + unsigned Len; + +public: /// Set to false if the candidate overlapped with another candidate. bool InCandidateList = true; - /// The start index of this \p Candidate. - size_t StartIdx; + /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of + /// \p OutlinedFunctions. + unsigned FunctionIdx; - /// The number of instructions in this \p Candidate. - size_t Len; + /// Contains all target-specific information for this \p Candidate. + TargetInstrInfo::MachineOutlinerInfo MInfo; - /// The index of this \p Candidate's \p OutlinedFunction in the list of - /// \p OutlinedFunctions. - size_t FunctionIdx; + /// Return the number of instructions in this Candidate. + unsigned getLength() const { return Len; } + + /// Return the start index of this candidate. + unsigned getStartIdx() const { return StartIdx; } + + // Return the end index of this candidate. + unsigned getEndIdx() const { return StartIdx + Len - 1; } /// \brief The number of instructions that would be saved by outlining every /// candidate of this type. @@ -96,51 +125,79 @@ struct Candidate { /// for some given candidate. unsigned Benefit = 0; - Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx) + Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx) : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} Candidate() {} /// \brief Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. - bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; } + bool operator<(const Candidate &RHS) const { + return getStartIdx() > RHS.getStartIdx(); + } }; /// \brief The information necessary to create an outlined function for some /// class of candidate. struct OutlinedFunction { +private: + /// The number of candidates for this \p OutlinedFunction. + unsigned OccurrenceCount = 0; + +public: + std::vector<std::shared_ptr<Candidate>> Candidates; + /// The actual outlined function created. /// This is initialized after we go through and create the actual function. MachineFunction *MF = nullptr; /// A number assigned to this function which appears at the end of its name. - size_t Name; - - /// The number of candidates for this OutlinedFunction. - size_t OccurrenceCount = 0; + unsigned Name; /// \brief The sequence of integers corresponding to the instructions in this /// function. std::vector<unsigned> Sequence; - /// The number of instructions this function would save. - unsigned Benefit = 0; + /// Contains all target-specific information for this \p OutlinedFunction. + TargetInstrInfo::MachineOutlinerInfo MInfo; + + /// Return the number of candidates for this \p OutlinedFunction. + unsigned getOccurrenceCount() { return OccurrenceCount; } + + /// Decrement the occurrence count of this OutlinedFunction and return the + /// new count. + unsigned decrement() { + assert(OccurrenceCount > 0 && "Can't decrement an empty function!"); + OccurrenceCount--; + return getOccurrenceCount(); + } + + /// \brief Return the number of instructions it would take to outline this + /// function. + unsigned getOutliningCost() { + return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() + + MInfo.FrameOverhead; + } - /// \brief Set to true if candidates for this outlined function should be - /// replaced with tail calls to this OutlinedFunction. - bool IsTailCall = false; + /// \brief Return the number of instructions that would be saved by outlining + /// this function. + unsigned getBenefit() { + unsigned NotOutlinedCost = OccurrenceCount * Sequence.size(); + unsigned OutlinedCost = getOutliningCost(); + return (NotOutlinedCost < OutlinedCost) ? 0 + : NotOutlinedCost - OutlinedCost; + } - OutlinedFunction(size_t Name, size_t OccurrenceCount, + OutlinedFunction(unsigned Name, unsigned OccurrenceCount, const std::vector<unsigned> &Sequence, - unsigned Benefit, bool IsTailCall) - : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence), - Benefit(Benefit), IsTailCall(IsTailCall) - {} + TargetInstrInfo::MachineOutlinerInfo &MInfo) + : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence), + MInfo(MInfo) {} }; /// Represents an undefined index in the suffix tree. -const size_t EmptyIdx = -1; +const unsigned EmptyIdx = -1; /// A node in a suffix tree which represents a substring or suffix. /// @@ -170,7 +227,7 @@ struct SuffixTreeNode { bool IsInTree = true; /// The start index of this node's substring in the main string. - size_t StartIdx = EmptyIdx; + unsigned StartIdx = EmptyIdx; /// The end index of this node's substring in the main string. /// @@ -178,24 +235,23 @@ struct SuffixTreeNode { /// step in the construction algorithm. To avoid having to update O(N) /// nodes individually at the end of every step, the end index is stored /// as a pointer. - size_t *EndIdx = nullptr; + unsigned *EndIdx = nullptr; /// For leaves, the start index of the suffix represented by this node. /// /// For all other nodes, this is ignored. - size_t SuffixIdx = EmptyIdx; + unsigned SuffixIdx = EmptyIdx; /// \brief For internal nodes, a pointer to the internal node representing /// the same sequence with the first character chopped off. /// - /// This has two major purposes in the suffix tree. The first is as a - /// shortcut in Ukkonen's construction algorithm. One of the things that + /// This acts as a shortcut in Ukkonen's algorithm. One of the things that /// Ukkonen's algorithm does to achieve linear-time construction is /// keep track of which node the next insert should be at. This makes each /// insert O(1), and there are a total of O(N) inserts. The suffix link /// helps with inserting children of internal nodes. /// - /// Say we add a child to an internal node with associated mapping S. The + /// Say we add a child to an internal node with associated mapping S. The /// next insertion must be at the node representing S - its first character. /// This is given by the way that we iteratively build the tree in Ukkonen's /// algorithm. The main idea is to look at the suffixes of each prefix in the @@ -204,27 +260,6 @@ struct SuffixTreeNode { /// move to the next insertion point in O(1) time. If we don't, then we'd /// have to query from the root, which takes O(N) time. This would make the /// construction algorithm O(N^2) rather than O(N). - /// - /// The suffix link is also used during the tree pruning process to let us - /// quickly throw out a bunch of potential overlaps. Say we have a sequence - /// S we want to outline. Then each of its suffixes contribute to at least - /// one overlapping case. Therefore, we can follow the suffix links - /// starting at the node associated with S to the root and "delete" those - /// nodes, save for the root. For each candidate, this removes - /// O(|candidate|) overlaps from the search space. We don't actually - /// completely invalidate these nodes though; doing that is far too - /// aggressive. Consider the following pathological string: - /// - /// 1 2 3 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 - /// - /// If we, for the sake of example, outlined 1 2 3, then we would throw - /// out all instances of 2 3. This isn't desirable. To get around this, - /// when we visit a link node, we decrement its occurrence count by the - /// number of sequences we outlined in the current step. In the pathological - /// example, the 2 3 node would have an occurrence count of 8, while the - /// 1 2 3 node would have an occurrence count of 2. Thus, the 2 3 node - /// would survive to the next round allowing us to outline the extra - /// instances of 2 3. SuffixTreeNode *Link = nullptr; /// The parent of this node. Every node except for the root has a parent. @@ -234,11 +269,11 @@ struct SuffixTreeNode { /// /// This is equal to the number of leaf children of the string. It represents /// the number of suffixes that the node's string is a prefix of. - size_t OccurrenceCount = 0; + unsigned OccurrenceCount = 0; /// The length of the string formed by concatenating the edge labels from the /// root to this node. - size_t ConcatLen = 0; + unsigned ConcatLen = 0; /// Returns true if this node is a leaf. bool isLeaf() const { return SuffixIdx != EmptyIdx; } @@ -260,7 +295,7 @@ struct SuffixTreeNode { return *EndIdx - StartIdx + 1; } - SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link, + SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link, SuffixTreeNode *Parent) : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {} @@ -290,10 +325,16 @@ struct SuffixTreeNode { /// /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf class SuffixTree { -private: +public: + /// Stores each leaf node in the tree. + /// + /// This is used for finding outlining candidates. + std::vector<SuffixTreeNode *> LeafVector; + /// Each element is an integer representing an instruction in the module. ArrayRef<unsigned> Str; +private: /// Maintains each node in the tree. SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator; @@ -303,11 +344,6 @@ private: /// \p NodeAllocator like every other node in the tree. SuffixTreeNode *Root = nullptr; - /// Stores each leaf node in the tree. - /// - /// This is used for finding outlining candidates. - std::vector<SuffixTreeNode *> LeafVector; - /// Maintains the end indices of the internal nodes in the tree. /// /// Each internal node is guaranteed to never have its end index change @@ -318,7 +354,7 @@ private: BumpPtrAllocator InternalEndIdxAllocator; /// The end index of each leaf in the tree. - size_t LeafEndIdx = -1; + unsigned LeafEndIdx = -1; /// \brief Helper struct which keeps track of the next insertion point in /// Ukkonen's algorithm. @@ -327,10 +363,10 @@ private: SuffixTreeNode *Node; /// The index of the first character in the substring currently being added. - size_t Idx = EmptyIdx; + unsigned Idx = EmptyIdx; /// The length of the substring we have to add at the current step. - size_t Len = 0; + unsigned Len = 0; }; /// \brief The point the next insertion will take place at in the @@ -344,15 +380,13 @@ private: /// \param Edge The label on the edge leaving \p Parent to this node. /// /// \returns A pointer to the allocated leaf node. - SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx, + SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx, unsigned Edge) { assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); - SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, - &LeafEndIdx, - nullptr, - &Parent); + SuffixTreeNode *N = new (NodeAllocator.Allocate()) + SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent); Parent.Children[Edge] = N; return N; @@ -366,18 +400,16 @@ private: /// \param Edge The label on the edge leaving \p Parent to this node. /// /// \returns A pointer to the allocated internal node. - SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx, - size_t EndIdx, unsigned Edge) { + SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx, + unsigned EndIdx, unsigned Edge) { assert(StartIdx <= EndIdx && "String can't start after it ends!"); assert(!(!Parent && StartIdx != EmptyIdx) && - "Non-root internal nodes must have parents!"); + "Non-root internal nodes must have parents!"); - size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx); - SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, - E, - Root, - Parent); + unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); + SuffixTreeNode *N = new (NodeAllocator.Allocate()) + SuffixTreeNode(StartIdx, E, Root, Parent); if (Parent) Parent->Children[Edge] = N; @@ -390,7 +422,7 @@ private: /// /// \param[in] CurrNode The node currently being visited. /// \param CurrIdx The current index of the string being visited. - void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) { + void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) { bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); @@ -401,14 +433,13 @@ private: CurrNode.ConcatLen = CurrNode.size(); if (CurrNode.Parent) - CurrNode.ConcatLen += CurrNode.Parent->ConcatLen; + CurrNode.ConcatLen += CurrNode.Parent->ConcatLen; } // Traverse the tree depth-first. for (auto &ChildPair : CurrNode.Children) { assert(ChildPair.second && "Node had a null child!"); - setSuffixIndices(*ChildPair.second, - CurrIdx + ChildPair.second->size()); + setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size()); } // Is this node a leaf? @@ -437,11 +468,11 @@ private: /// /// \returns The number of suffixes that have not been added at the end of /// this step. - unsigned extend(size_t EndIdx, size_t SuffixesToAdd) { + unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) { SuffixTreeNode *NeedsLink = nullptr; while (SuffixesToAdd > 0) { - + // Are we waiting to add anything other than just the last character? if (Active.Len == 0) { // If not, then say the active index is the end index. @@ -469,7 +500,7 @@ private: // insert a new node. SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; - size_t SubstringLen = NextNode->size(); + unsigned SubstringLen = NextNode->size(); // Is the current suffix we're trying to insert longer than the size of // the child we want to move to? @@ -515,10 +546,8 @@ private: // The node s from the diagram SuffixTreeNode *SplitNode = - insertInternalNode(Active.Node, - NextNode->StartIdx, - NextNode->StartIdx + Active.Len - 1, - FirstChar); + insertInternalNode(Active.Node, NextNode->StartIdx, + NextNode->StartIdx + Active.Len - 1, FirstChar); // Insert the new node representing the new substring into the tree as // a child of the split node. This is the node l from the diagram. @@ -556,87 +585,6 @@ private: } public: - - /// Find all repeated substrings that satisfy \p BenefitFn. - /// - /// If a substring appears at least twice, then it must be represented by - /// an internal node which appears in at least two suffixes. Each suffix is - /// represented by a leaf node. To do this, we visit each internal node in - /// the tree, using the leaf children of each internal node. If an internal - /// node represents a beneficial substring, then we use each of its leaf - /// children to find the locations of its substring. - /// - /// \param[out] CandidateList Filled with candidates representing each - /// beneficial substring. - /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each - /// type of candidate. - /// \param BenefitFn The function to satisfy. - /// - /// \returns The length of the longest candidate found. - size_t findCandidates(std::vector<Candidate> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - const std::function<unsigned(SuffixTreeNode &, size_t, unsigned)> - &BenefitFn) { - - CandidateList.clear(); - FunctionList.clear(); - size_t FnIdx = 0; - size_t MaxLen = 0; - - for (SuffixTreeNode* Leaf : LeafVector) { - assert(Leaf && "Leaves in LeafVector cannot be null!"); - if (!Leaf->IsInTree) - continue; - - assert(Leaf->Parent && "All leaves must have parents!"); - SuffixTreeNode &Parent = *(Leaf->Parent); - - // If it doesn't appear enough, or we already outlined from it, skip it. - if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) - continue; - - size_t StringLen = Leaf->ConcatLen - Leaf->size(); - - // How many instructions would outlining this string save? - unsigned Benefit = BenefitFn(Parent, - StringLen, Str[Leaf->SuffixIdx + StringLen - 1]); - - // If it's not beneficial, skip it. - if (Benefit < 1) - continue; - - if (StringLen > MaxLen) - MaxLen = StringLen; - - unsigned OccurrenceCount = 0; - for (auto &ChildPair : Parent.Children) { - SuffixTreeNode *M = ChildPair.second; - - // Is it a leaf? If so, we have an occurrence of this candidate. - if (M && M->IsInTree && M->isLeaf()) { - OccurrenceCount++; - CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx); - CandidateList.back().Benefit = Benefit; - M->IsInTree = false; - } - } - - // Save the function for the new candidate sequence. - std::vector<unsigned> CandidateSequence; - for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) - CandidateSequence.push_back(Str[i]); - - FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence, - Benefit, false); - - // Move to the next function. - FnIdx++; - Parent.IsInTree = false; - } - - return MaxLen; - } - /// Construct a suffix tree from a sequence of unsigned integers. /// /// \param Str The string to construct the suffix tree for. @@ -644,17 +592,18 @@ public: Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); Root->IsInTree = true; Active.Node = Root; - LeafVector = std::vector<SuffixTreeNode*>(Str.size()); + LeafVector = std::vector<SuffixTreeNode *>(Str.size()); // Keep track of the number of suffixes we have to add of the current // prefix. - size_t SuffixesToAdd = 0; + unsigned SuffixesToAdd = 0; Active.Node = Root; // Construct the suffix tree iteratively on each prefix of the string. // PfxEndIdx is the end index of the current prefix. // End is one past the last element in the string. - for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) { + for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; + PfxEndIdx++) { SuffixesToAdd++; LeafEndIdx = PfxEndIdx; // Extend each of the leaves. SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); @@ -708,9 +657,9 @@ struct InstructionMapper { MachineInstr &MI = *It; bool WasInserted; DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator - ResultIt; + ResultIt; std::tie(ResultIt, WasInserted) = - InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber)); + InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber)); unsigned MINumber = ResultIt->second; // There was an insertion. @@ -725,10 +674,10 @@ struct InstructionMapper { if (LegalInstrNumber >= IllegalInstrNumber) report_fatal_error("Instruction mapping overflow!"); - assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() - && "Tried to assign DenseMap tombstone or empty key to instruction."); - assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() - && "Tried to assign DenseMap tombstone or empty key to instruction."); + assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); return MINumber; } @@ -748,13 +697,11 @@ struct InstructionMapper { assert(LegalInstrNumber < IllegalInstrNumber && "Instruction mapping overflow!"); - assert(IllegalInstrNumber != - DenseMapInfo<unsigned>::getEmptyKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - assert(IllegalInstrNumber != - DenseMapInfo<unsigned>::getTombstoneKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); return MINumber; } @@ -777,17 +724,17 @@ struct InstructionMapper { It++) { // Keep track of where this instruction is in the module. - switch(TII.getOutliningType(*It)) { - case TargetInstrInfo::MachineOutlinerInstrType::Illegal: - mapToIllegalUnsigned(It); - break; + switch (TII.getOutliningType(*It)) { + case TargetInstrInfo::MachineOutlinerInstrType::Illegal: + mapToIllegalUnsigned(It); + break; - case TargetInstrInfo::MachineOutlinerInstrType::Legal: - mapToLegalUnsigned(It); - break; + case TargetInstrInfo::MachineOutlinerInstrType::Legal: + mapToLegalUnsigned(It); + break; - case TargetInstrInfo::MachineOutlinerInstrType::Invisible: - break; + case TargetInstrInfo::MachineOutlinerInstrType::Invisible: + break; } } @@ -804,9 +751,9 @@ struct InstructionMapper { // Make sure that the implementation of DenseMapInfo<unsigned> hasn't // changed. assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 && - "DenseMapInfo<unsigned>'s empty key isn't -1!"); + "DenseMapInfo<unsigned>'s empty key isn't -1!"); assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 && - "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); + "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); } }; @@ -823,6 +770,10 @@ struct MachineOutliner : public ModulePass { static char ID; + /// \brief Set to true if the outliner should consider functions with + /// linkonceodr linkage. + bool OutlineFromLinkOnceODRs = false; + StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -832,10 +783,35 @@ struct MachineOutliner : public ModulePass { ModulePass::getAnalysisUsage(AU); } - MachineOutliner() : ModulePass(ID) { + MachineOutliner(bool OutlineFromLinkOnceODRs = false) + : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) { initializeMachineOutlinerPass(*PassRegistry::getPassRegistry()); } + /// Find all repeated substrings that satisfy the outlining cost model. + /// + /// If a substring appears at least twice, then it must be represented by + /// an internal node which appears in at least two suffixes. Each suffix is + /// represented by a leaf node. To do this, we visit each internal node in + /// the tree, using the leaf children of each internal node. If an internal + /// node represents a beneficial substring, then we use each of its leaf + /// children to find the locations of its substring. + /// + /// \param ST A suffix tree to query. + /// \param TII TargetInstrInfo for the target. + /// \param Mapper Contains outlining mapping information. + /// \param[out] CandidateList Filled with candidates representing each + /// beneficial substring. + /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each + /// type of candidate. + /// + /// \returns The length of the longest candidate found. + unsigned + findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, + InstructionMapper &Mapper, + std::vector<std::shared_ptr<Candidate>> &CandidateList, + std::vector<OutlinedFunction> &FunctionList); + /// \brief Replace the sequences of instructions represented by the /// \p Candidates in \p CandidateList with calls to \p MachineFunctions /// described in \p FunctionList. @@ -844,7 +820,8 @@ struct MachineOutliner : public ModulePass { /// \param CandidateList A list of candidates to be outlined. /// \param FunctionList A list of functions to be inserted into the module. /// \param Mapper Contains the instruction mappings for the module. - bool outline(Module &M, const ArrayRef<Candidate> &CandidateList, + bool outline(Module &M, + const ArrayRef<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper); @@ -865,11 +842,15 @@ struct MachineOutliner : public ModulePass { /// \param TII TargetInstrInfo for the module. /// /// \returns The length of the longest candidate found. 0 if there are none. - unsigned buildCandidateList(std::vector<Candidate> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - SuffixTree &ST, - InstructionMapper &Mapper, - const TargetInstrInfo &TII); + unsigned + buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList, + std::vector<OutlinedFunction> &FunctionList, + SuffixTree &ST, InstructionMapper &Mapper, + const TargetInstrInfo &TII); + + /// Helper function for pruneOverlaps. + /// Removes \p C from the candidate list, and updates its \p OutlinedFunction. + void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList); /// \brief Remove any overlapping candidates that weren't handled by the /// suffix tree's pruning method. @@ -881,11 +862,12 @@ struct MachineOutliner : public ModulePass { /// /// \param[in,out] CandidateList A list of outlining candidates. /// \param[in,out] FunctionList A list of functions to be outlined. + /// \param Mapper Contains instruction mapping info for outlining. /// \param MaxCandidateLen The length of the longest candidate. /// \param TII TargetInstrInfo for the module. - void pruneOverlaps(std::vector<Candidate> &CandidateList, + void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList, - unsigned MaxCandidateLen, + InstructionMapper &Mapper, unsigned MaxCandidateLen, const TargetInstrInfo &TII); /// Construct a suffix tree on the instructions in \p M and outline repeated @@ -898,16 +880,223 @@ struct MachineOutliner : public ModulePass { char MachineOutliner::ID = 0; namespace llvm { -ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } +ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) { + return new MachineOutliner(OutlineFromLinkOnceODRs); +} + +} // namespace llvm + +INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, + false) + +unsigned MachineOutliner::findCandidates( + SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, + std::vector<std::shared_ptr<Candidate>> &CandidateList, + std::vector<OutlinedFunction> &FunctionList) { + CandidateList.clear(); + FunctionList.clear(); + unsigned MaxLen = 0; + + // FIXME: Visit internal nodes instead of leaves. + for (SuffixTreeNode *Leaf : ST.LeafVector) { + assert(Leaf && "Leaves in LeafVector cannot be null!"); + if (!Leaf->IsInTree) + continue; + + assert(Leaf->Parent && "All leaves must have parents!"); + SuffixTreeNode &Parent = *(Leaf->Parent); + + // If it doesn't appear enough, or we already outlined from it, skip it. + if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) + continue; + + // Figure out if this candidate is beneficial. + unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size(); + + // Too short to be beneficial; skip it. + // FIXME: This isn't necessarily true for, say, X86. If we factor in + // instruction lengths we need more information than this. + if (StringLen < 2) + continue; + + // If this is a beneficial class of candidate, then every one is stored in + // this vector. + std::vector<Candidate> CandidatesForRepeatedSeq; + + // Describes the start and end point of each candidate. This allows the + // target to infer some information about each occurrence of each repeated + // sequence. + // FIXME: CandidatesForRepeatedSeq and this should be combined. + std::vector< + std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> + RepeatedSequenceLocs; + + // Figure out the call overhead for each instance of the sequence. + for (auto &ChildPair : Parent.Children) { + SuffixTreeNode *M = ChildPair.second; + + if (M && M->IsInTree && M->isLeaf()) { + // Never visit this leaf again. + M->IsInTree = false; + unsigned StartIdx = M->SuffixIdx; + unsigned EndIdx = StartIdx + StringLen - 1; + + // Trick: Discard some candidates that would be incompatible with the + // ones we've already found for this sequence. This will save us some + // work in candidate selection. + // + // If two candidates overlap, then we can't outline them both. This + // happens when we have candidates that look like, say + // + // AA (where each "A" is an instruction). + // + // We might have some portion of the module that looks like this: + // AAAAAA (6 A's) + // + // In this case, there are 5 different copies of "AA" in this range, but + // at most 3 can be outlined. If only outlining 3 of these is going to + // be unbeneficial, then we ought to not bother. + // + // Note that two things DON'T overlap when they look like this: + // start1...end1 .... start2...end2 + // That is, one must either + // * End before the other starts + // * Start after the other ends + if (std::all_of(CandidatesForRepeatedSeq.begin(), + CandidatesForRepeatedSeq.end(), + [&StartIdx, &EndIdx](const Candidate &C) { + return (EndIdx < C.getStartIdx() || + StartIdx > C.getEndIdx()); + })) { + // It doesn't overlap with anything, so we can outline it. + // Each sequence is over [StartIt, EndIt]. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; + MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; + + // Save the candidate and its location. + CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, + FunctionList.size()); + RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); + } + } + } + + // We've found something we might want to outline. + // Create an OutlinedFunction to store it and check if it'd be beneficial + // to outline. + TargetInstrInfo::MachineOutlinerInfo MInfo = + TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); + std::vector<unsigned> Seq; + for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) + Seq.push_back(ST.Str[i]); + OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(), + Seq, MInfo); + unsigned Benefit = OF.getBenefit(); + + // Is it better to outline this candidate than not? + if (Benefit < 1) { + // Outlining this candidate would take more instructions than not + // outlining. + // Emit a remark explaining why we didn't outline this candidate. + std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C = + RepeatedSequenceLocs[0]; + MachineOptimizationRemarkEmitter MORE( + *(C.first->getParent()->getParent()), nullptr); + MORE.emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", + C.first->getDebugLoc(), + C.first->getParent()); + R << "Did not outline " << NV("Length", StringLen) << " instructions" + << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) + << " locations." + << " Instructions from outlining all occurrences (" + << NV("OutliningCost", OF.getOutliningCost()) << ")" + << " >= Unoutlined instruction count (" + << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")" + << " (Also found at: "; + + // Tell the user the other places the candidate was found. + for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { + R << NV((Twine("OtherStartLoc") + Twine(i)).str(), + RepeatedSequenceLocs[i].first->getDebugLoc()); + if (i != e - 1) + R << ", "; + } + + R << ")"; + return R; + }); + + // Move to the next candidate. + continue; + } + + if (StringLen > MaxLen) + MaxLen = StringLen; + + // At this point, the candidate class is seen as beneficial. Set their + // benefit values and save them in the candidate list. + std::vector<std::shared_ptr<Candidate>> CandidatesForFn; + for (Candidate &C : CandidatesForRepeatedSeq) { + C.Benefit = Benefit; + C.MInfo = MInfo; + std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C); + CandidateList.push_back(Cptr); + CandidatesForFn.push_back(Cptr); + } + + FunctionList.push_back(OF); + FunctionList.back().Candidates = CandidatesForFn; + + // Move to the next function. + Parent.IsInTree = false; + } + + return MaxLen; +} + +// Remove C from the candidate space, and update its OutlinedFunction. +void MachineOutliner::prune(Candidate &C, + std::vector<OutlinedFunction> &FunctionList) { + // Get the OutlinedFunction associated with this Candidate. + OutlinedFunction &F = FunctionList[C.FunctionIdx]; + + // Update C's associated function's occurrence count. + F.decrement(); + + // Remove C from the CandidateList. + C.InCandidateList = false; + + DEBUG(dbgs() << "- Removed a Candidate \n"; + dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount() + << "\n"; + dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() + << "\n";); } -INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, - "Machine Function Outliner", false, false) +void MachineOutliner::pruneOverlaps( + std::vector<std::shared_ptr<Candidate>> &CandidateList, + std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper, + unsigned MaxCandidateLen, const TargetInstrInfo &TII) { + + // Return true if this candidate became unbeneficial for outlining in a + // previous step. + auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) { + + // Check if the candidate was removed in a previous step. + if (!C.InCandidateList) + return true; + + // C must be alive. Check if we should remove it. + if (FunctionList[C.FunctionIdx].getBenefit() < 1) { + prune(C, FunctionList); + return true; + } + + // C is in the list, and F is still beneficial. + return false; + }; -void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - unsigned MaxCandidateLen, - const TargetInstrInfo &TII) { // TODO: Experiment with interval trees or other interval-checking structures // to lower the time complexity of this function. // TODO: Can we do better than the simple greedy choice? @@ -915,56 +1104,35 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, // This is O(MaxCandidateLen * CandidateList.size()). for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et; It++) { - Candidate &C1 = *It; - OutlinedFunction &F1 = FunctionList[C1.FunctionIdx]; + Candidate &C1 = **It; - // If we removed this candidate, skip it. - if (!C1.InCandidateList) + // If C1 was already pruned, or its function is no longer beneficial for + // outlining, move to the next candidate. + if (ShouldSkipCandidate(C1)) continue; - // Is it still worth it to outline C1? - if (F1.Benefit < 1 || F1.OccurrenceCount < 2) { - assert(F1.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F1.OccurrenceCount--; - C1.InCandidateList = false; - continue; - } - // The minimum start index of any candidate that could overlap with this // one. unsigned FarthestPossibleIdx = 0; // Either the index is 0, or it's at most MaxCandidateLen indices away. - if (C1.StartIdx > MaxCandidateLen) - FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen; + if (C1.getStartIdx() > MaxCandidateLen) + FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen; // Compare against the candidates in the list that start at at most // FarthestPossibleIdx indices away from C1. There are at most // MaxCandidateLen of these. for (auto Sit = It + 1; Sit != Et; Sit++) { - Candidate &C2 = *Sit; - OutlinedFunction &F2 = FunctionList[C2.FunctionIdx]; + Candidate &C2 = **Sit; // Is this candidate too far away to overlap? - if (C2.StartIdx < FarthestPossibleIdx) + if (C2.getStartIdx() < FarthestPossibleIdx) break; - // Did we already remove this candidate in a previous step? - if (!C2.InCandidateList) - continue; - - // Is the function beneficial to outline? - if (F2.OccurrenceCount < 2 || F2.Benefit < 1) { - // If not, remove this candidate and move to the next one. - assert(F2.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F2.OccurrenceCount--; - C2.InCandidateList = false; + // If C2 was already pruned, or its function is no longer beneficial for + // outlining, move to the next candidate. + if (ShouldSkipCandidate(C2)) continue; - } - - size_t C2End = C2.StartIdx + C2.Len - 1; // Do C1 and C2 overlap? // @@ -974,7 +1142,7 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, // We sorted our candidate list so C2Start <= C1Start. We know that // C2End > C2Start since each candidate has length >= 2. Therefore, all we // have to check is C2End < C2Start to see if we overlap. - if (C2End < C1.StartIdx) + if (C2.getEndIdx() < C1.getStartIdx()) continue; // C1 and C2 overlap. @@ -982,118 +1150,52 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, // // Approximate this by picking the one which would have saved us the // most instructions before any pruning. - if (C1.Benefit >= C2.Benefit) { - - // C1 is better, so remove C2 and update C2's OutlinedFunction to - // reflect the removal. - assert(F2.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F2.OccurrenceCount--; - F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(), - F2.OccurrenceCount, - F2.IsTailCall - ); - - C2.InCandidateList = false; - - DEBUG ( - dbgs() << "- Removed C2. \n"; - dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount << "\n"; - dbgs() << "--- C2's benefit: " << F2.Benefit << "\n"; - ); - } else { - // C2 is better, so remove C1 and update C1's OutlinedFunction to - // reflect the removal. - assert(F1.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F1.OccurrenceCount--; - F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(), - F1.OccurrenceCount, - F1.IsTailCall - ); - C1.InCandidateList = false; - - DEBUG ( - dbgs() << "- Removed C1. \n"; - dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount << "\n"; - dbgs() << "--- C1's benefit: " << F1.Benefit << "\n"; - ); - - // C1 is out, so we don't have to compare it against anyone else. + // Is C2 a better candidate? + if (C2.Benefit > C1.Benefit) { + // Yes, so prune C1. Since C1 is dead, we don't have to compare it + // against anything anymore, so break. + prune(C1, FunctionList); break; } + + // Prune C2 and move on to the next candidate. + prune(C2, FunctionList); } } } -unsigned -MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - SuffixTree &ST, - InstructionMapper &Mapper, - const TargetInstrInfo &TII) { +unsigned MachineOutliner::buildCandidateList( + std::vector<std::shared_ptr<Candidate>> &CandidateList, + std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST, + InstructionMapper &Mapper, const TargetInstrInfo &TII) { std::vector<unsigned> CandidateSequence; // Current outlining candidate. - size_t MaxCandidateLen = 0; // Length of the longest candidate. - - // Function for maximizing query in the suffix tree. - // This allows us to define more fine-grained types of things to outline in - // the target without putting target-specific info in the suffix tree. - auto BenefitFn = [&TII, &Mapper](const SuffixTreeNode &Curr, - size_t StringLen, unsigned EndVal) { - - // The root represents the empty string. - if (Curr.isRoot()) - return 0u; - - // Is this long enough to outline? - // TODO: Let the target decide how "long" a string is in terms of the sizes - // of the instructions in the string. For example, if a call instruction - // is smaller than a one instruction string, we should outline that string. - if (StringLen < 2) - return 0u; - - size_t Occurrences = Curr.OccurrenceCount; + unsigned MaxCandidateLen = 0; // Length of the longest candidate. - // Anything we want to outline has to appear at least twice. - if (Occurrences < 2) - return 0u; - - // Check if the last instruction in the sequence is a return. - MachineInstr *LastInstr = - Mapper.IntegerInstructionMap[EndVal]; - assert(LastInstr && "Last instruction in sequence was unmapped!"); - - // The only way a terminator could be mapped as legal is if it was safe to - // tail call. - bool IsTailCall = LastInstr->isTerminator(); - return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall); - }; - - MaxCandidateLen = ST.findCandidates(CandidateList, FunctionList, BenefitFn); - - for (auto &OF : FunctionList) - OF.IsTailCall = Mapper. - IntegerInstructionMap[OF.Sequence.back()]->isTerminator(); + MaxCandidateLen = + findCandidates(ST, TII, Mapper, CandidateList, FunctionList); // Sort the candidates in decending order. This will simplify the outlining // process when we have to remove the candidates from the mapping by // allowing us to cut them out without keeping track of an offset. - std::stable_sort(CandidateList.begin(), CandidateList.end()); + std::stable_sort( + CandidateList.begin(), CandidateList.end(), + [](const std::shared_ptr<Candidate> &LHS, + const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; }); return MaxCandidateLen; } MachineFunction * MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, - InstructionMapper &Mapper) { + InstructionMapper &Mapper) { // Create the function name. This should be unique. For now, just hash the // module name and include it in the function name plus the number of this // function. std::ostringstream NameStream; - NameStream << "OUTLINED_FUNCTION" << "_" << OF.Name; + NameStream << "OUTLINED_FUNCTION_" << OF.Name; // Create the function using an IR-level function. LLVMContext &C = M.getContext(); @@ -1119,7 +1221,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, // Insert the new function into the module. MF.insert(MF.begin(), &MBB); - TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall); + TII.insertOutlinerPrologue(MBB, MF, OF.MInfo); // Copy over the instructions for the function using the integer mappings in // its sequence. @@ -1134,21 +1236,19 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, MBB.insert(MBB.end(), NewMI); } - TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall); + TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo); return &MF; } -bool MachineOutliner::outline(Module &M, - const ArrayRef<Candidate> &CandidateList, - std::vector<OutlinedFunction> &FunctionList, - InstructionMapper &Mapper) { +bool MachineOutliner::outline( + Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList, + std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) { bool OutlinedSomething = false; - // Replace the candidates with calls to their respective outlined functions. - for (const Candidate &C : CandidateList) { - + for (const std::shared_ptr<Candidate> &Cptr : CandidateList) { + Candidate &C = *Cptr; // Was the candidate removed during pruneOverlaps? if (!C.InCandidateList) continue; @@ -1157,14 +1257,15 @@ bool MachineOutliner::outline(Module &M, OutlinedFunction &OF = FunctionList[C.FunctionIdx]; // Was its OutlinedFunction made unbeneficial during pruneOverlaps? - if (OF.OccurrenceCount < 2 || OF.Benefit < 1) + if (OF.getBenefit() < 1) continue; // If not, then outline it. - assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); - MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent(); - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx]; - unsigned EndIdx = C.StartIdx + C.Len - 1; + assert(C.getStartIdx() < Mapper.InstrList.size() && + "Candidate out of bounds!"); + MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent(); + MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()]; + unsigned EndIdx = C.getEndIdx(); assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; @@ -1175,6 +1276,37 @@ bool MachineOutliner::outline(Module &M, // Does this candidate have a function yet? if (!OF.MF) { OF.MF = createOutlinedFunction(M, OF, Mapper); + MachineBasicBlock *MBB = &*OF.MF->begin(); + + // Output a remark telling the user that an outlined function was created, + // and explaining where it came from. + MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr); + MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", + MBB->findDebugLoc(MBB->begin()), MBB); + R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) + << " instructions by " + << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " + << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) + << " locations. " + << "(Found at: "; + + // Tell the user the other places the candidate was found. + for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { + + // Skip over things that were pruned. + if (!OF.Candidates[i]->InCandidateList) + continue; + + R << NV( + (Twine("StartLoc") + Twine(i)).str(), + Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc()); + if (i != e - 1) + R << ", "; + } + + R << ")"; + + MORE.emit(R); FunctionsCreated++; } @@ -1183,8 +1315,8 @@ bool MachineOutliner::outline(Module &M, const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert a call to the new function and erase the old sequence. - TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall); - StartIt = Mapper.InstrList[C.StartIdx]; + TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo); + StartIt = Mapper.InstrList[C.getStartIdx()]; MBB->erase(StartIt, EndIt); OutlinedSomething = true; @@ -1193,9 +1325,7 @@ bool MachineOutliner::outline(Module &M, NumOutlined++; } - DEBUG ( - dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n"; - ); + DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); return OutlinedSomething; } @@ -1207,8 +1337,8 @@ bool MachineOutliner::runOnModule(Module &M) { return false; MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); - const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin()) - .getSubtarget(); + const TargetSubtargetInfo &STI = + MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); @@ -1219,7 +1349,8 @@ bool MachineOutliner::runOnModule(Module &M) { MachineFunction &MF = MMI.getOrCreateMachineFunction(F); // Is the function empty? Safe to outline from? - if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF)) + if (F.empty() || + !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs)) continue; // If it is, look at each MachineBasicBlock in the function. @@ -1236,7 +1367,7 @@ bool MachineOutliner::runOnModule(Module &M) { // Construct a suffix tree, use it to find candidates, and then outline them. SuffixTree ST(Mapper.UnsignedVec); - std::vector<Candidate> CandidateList; + std::vector<std::shared_ptr<Candidate>> CandidateList; std::vector<OutlinedFunction> FunctionList; // Find all of the outlining candidates. @@ -1244,7 +1375,7 @@ bool MachineOutliner::runOnModule(Module &M) { buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); // Remove candidates that overlap with other candidates. - pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII); + pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); // Outline each of the candidates and return true if something was outlined. return outline(M, CandidateList, FunctionList, Mapper); diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 19e9a50e2c43..18cb9af499a6 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -1,4 +1,4 @@ -//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===// +//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===// // // The LLVM Compiler Infrastructure // @@ -73,14 +73,13 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" @@ -90,19 +89,23 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/PassAnalysisSupport.h" -#include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <climits> @@ -111,6 +114,7 @@ #include <functional> #include <iterator> #include <map> +#include <memory> #include <tuple> #include <utility> #include <vector> @@ -169,7 +173,6 @@ namespace { class NodeSet; class SMSchedule; -class SwingSchedulerDAG; /// The main class in the implementation of the target independent /// software pipeliner pass. @@ -185,6 +188,7 @@ public: #ifndef NDEBUG static int NumTries; #endif + /// Cache the target analysis information about the loop. struct LoopInfo { MachineBasicBlock *TBB = nullptr; @@ -196,6 +200,7 @@ public: LoopInfo LI; static char ID; + MachinePipeliner() : MachineFunctionPass(ID) { initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); } @@ -222,9 +227,9 @@ private: class SwingSchedulerDAG : public ScheduleDAGInstrs { MachinePipeliner &Pass; /// The minimum initiation interval between iterations for this schedule. - unsigned MII; + unsigned MII = 0; /// Set to true if a valid pipelined schedule is found for the loop. - bool Scheduled; + bool Scheduled = false; MachineLoop &Loop; LiveIntervals &LIS; const RegisterClassInfo &RegClassInfo; @@ -234,9 +239,10 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { ScheduleDAGTopologicalSort Topo; struct NodeInfo { - int ASAP; - int ALAP; - NodeInfo() : ASAP(0), ALAP(0) {} + int ASAP = 0; + int ALAP = 0; + + NodeInfo() = default; }; /// Computed properties for each node in the graph. std::vector<NodeInfo> ScheduleInfo; @@ -245,10 +251,10 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { /// Computed node ordering for scheduling. SetVector<SUnit *> NodeOrder; - typedef SmallVector<NodeSet, 8> NodeSetType; - typedef DenseMap<unsigned, unsigned> ValueMapTy; - typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy; - typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy; + using NodeSetType = SmallVector<NodeSet, 8>; + using ValueMapTy = DenseMap<unsigned, unsigned>; + using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; + using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; /// Instructions to change when emitting the final schedule. DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; @@ -272,8 +278,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { public: Circuits(std::vector<SUnit> &SUs) - : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()), - AdjK(SUs.size()) {} + : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {} + /// Reset the data structures used in the circuit algorithm. void reset() { Stack.clear(); @@ -281,6 +287,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); NumPaths = 0; } + void createAdjacencyStructure(SwingSchedulerDAG *DAG); bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); void unblock(int U); @@ -289,9 +296,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { public: SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci) - : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0), - Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci), - Topo(SUnits, &ExitSU) { + : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), + RegClassInfo(rci), Topo(SUnits, &ExitSU) { P.MF->getSubtarget().getSMSMutations(Mutations); } @@ -363,8 +369,9 @@ public: /// Set the Minimum Initiation Interval for this schedule attempt. void setMII(unsigned mii) { MII = mii; } - MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule, - bool UpdateDAG = false); + void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); + + void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); /// Return the new base register that was stored away for the changed /// instruction. @@ -455,7 +462,7 @@ private: /// that assigns a priority to the set. class NodeSet { SetVector<SUnit *> Nodes; - bool HasRecurrence; + bool HasRecurrence = false; unsigned RecMII = 0; int MaxMOV = 0; int MaxDepth = 0; @@ -463,10 +470,9 @@ class NodeSet { SUnit *ExceedPressure = nullptr; public: - typedef SetVector<SUnit *>::const_iterator iterator; - - NodeSet() : Nodes(), HasRecurrence(false) {} + using iterator = SetVector<SUnit *>::const_iterator; + NodeSet() = default; NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {} bool insert(SUnit *SU) { return Nodes.insert(SU); } @@ -581,13 +587,13 @@ private: /// Keep track of the first cycle value in the schedule. It starts /// as zero, but the algorithm allows negative values. - int FirstCycle; + int FirstCycle = 0; /// Keep track of the last cycle value in the schedule. - int LastCycle; + int LastCycle = 0; /// The initiation interval (II) for the schedule. - int InitiationInterval; + int InitiationInterval = 0; /// Target machine information. const TargetSubtargetInfo &ST; @@ -600,11 +606,7 @@ private: public: SMSchedule(MachineFunction *mf) : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), - Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) { - FirstCycle = 0; - LastCycle = 0; - InitiationInterval = 0; - } + Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {} void reset() { ScheduledInstrs.clear(); @@ -638,9 +640,9 @@ public: bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); /// Iterators for the cycle to instruction map. - typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator; - typedef DenseMap<int, std::deque<SUnit *>>::const_iterator - const_sched_iterator; + using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; + using const_sched_iterator = + DenseMap<int, std::deque<SUnit *>>::const_iterator; /// Return true if the instruction is scheduled at the specified stage. bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { @@ -715,6 +717,7 @@ char MachinePipeliner::ID = 0; int MachinePipeliner::NumTries = 0; #endif char &llvm::MachinePipelinerID = MachinePipeliner::ID; + INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -726,13 +729,13 @@ INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE, /// The "main" function for implementing Swing Modulo Scheduling. bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; if (!EnableSWP) return false; - if (mf.getFunction()->getAttributes().hasAttribute( + if (mf.getFunction().getAttributes().hasAttribute( AttributeList::FunctionIndex, Attribute::OptimizeForSize) && !EnableSWPOptSize.getPosition()) return false; @@ -1256,6 +1259,8 @@ struct FuncUnitSorter { const InstrItineraryData *InstrItins; DenseMap<unsigned, unsigned> Resources; + FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {} + // Compute the number of functional unit alternatives needed // at each stage, and take the minimum value. We prioritize the // instructions by the least number of choices first. @@ -1291,7 +1296,6 @@ struct FuncUnitSorter { } } - FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {} /// Return true if IS1 has less priority than IS2. bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const { unsigned F1 = 0, F2 = 0; @@ -1384,7 +1388,7 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { unsigned RecMII = 0; for (NodeSet &Nodes : NodeSets) { - if (Nodes.size() == 0) + if (Nodes.empty()) continue; unsigned Delay = Nodes.size() - 1; @@ -1554,7 +1558,6 @@ static bool ignoreDependence(const SDep &D, bool isPred) { /// D - Depth of each node. /// H - Height of each node. void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { - ScheduleInfo.resize(SUnits.size()); DEBUG({ @@ -1651,7 +1654,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, Preds.insert(IS->getSUnit()); } } - return Preds.size() > 0; + return !Preds.empty(); } /// Compute the Succ_L(O) set, as defined in the paper. The set is defined @@ -1683,7 +1686,7 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder, Succs.insert(PI->getSUnit()); } } - return Succs.size() > 0; + return !Succs.empty(); } /// Return true if there is a path from the specified node to any of the nodes @@ -1868,7 +1871,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { Visited.clear(); computePath(NI, Path, NodesAdded, I, Visited); } - if (Path.size() > 0) + if (!Path.empty()) I.insert(Path.begin(), Path.end()); } // Add the nodes from the previous node set to the current node set. @@ -1879,7 +1882,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { Visited.clear(); computePath(NI, Path, I, NodesAdded, Visited); } - if (Path.size() > 0) + if (!Path.empty()) I.insert(Path.begin(), Path.end()); } NodesAdded.insert(I.begin(), I.end()); @@ -1892,7 +1895,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { if (succ_L(NodesAdded, N)) for (SUnit *I : N) addConnectedNodes(I, NewSet, NodesAdded); - if (NewSet.size() > 0) + if (!NewSet.empty()) NodeSets.push_back(NewSet); // Create a new node set with the connected nodes of any predecessor of a node @@ -1901,7 +1904,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { if (pred_L(NodesAdded, N)) for (SUnit *I : N) addConnectedNodes(I, NewSet, NodesAdded); - if (NewSet.size() > 0) + if (!NewSet.empty()) NodeSets.push_back(NewSet); // Create new nodes sets with the connected nodes any any remaining node that @@ -1911,7 +1914,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { if (NodesAdded.count(SU) == 0) { NewSet.clear(); addConnectedNodes(SU, NewSet, NodesAdded); - if (NewSet.size() > 0) + if (!NewSet.empty()) NodeSets.push_back(NewSet); } } @@ -1976,7 +1979,7 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) { for (NodeSetType::iterator J = I + 1; J != E;) { J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); }); - if (J->size() == 0) { + if (J->empty()) { NodeSets.erase(J); E = NodeSets.end(); } else { @@ -2147,8 +2150,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { /// Process the nodes in the computed order and create the pipelined schedule /// of the instructions, if possible. Return true if a schedule is found. bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { - - if (NodeOrder.size() == 0) + if (NodeOrder.empty()) return false; bool scheduleFound = false; @@ -2325,7 +2327,7 @@ void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage, ValueMapTy *VRMap, MBBVectorTy &PrologBBs) { MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != NULL && + assert(PreheaderBB != nullptr && "Need to add code to handle loops w/o preheader"); MachineBasicBlock *PredBB = PreheaderBB; InstrMapTy InstrMap; @@ -3352,7 +3354,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, unsigned BaseReg = MI->getOperand(BasePosLd).getReg(); // Look for the Phi instruction. - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); MachineInstr *Phi = MRI.getVRegDef(BaseReg); if (!Phi || !Phi->isPHI()) return false; @@ -3389,9 +3391,8 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, /// Apply changes to the instruction if needed. The changes are need /// to improve the scheduling and depend up on the final schedule. -MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, - SMSchedule &Schedule, - bool UpdateDAG) { +void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, + SMSchedule &Schedule) { SUnit *SU = getSUnit(MI); DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = InstrChanges.find(SU); @@ -3399,7 +3400,7 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, std::pair<unsigned, int64_t> RegAndOffset = It->second; unsigned BasePos, OffsetPos; if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) - return nullptr; + return; unsigned BaseReg = MI->getOperand(BasePos).getReg(); MachineInstr *LoopDef = findDefInLoop(BaseReg); int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef)); @@ -3417,15 +3418,11 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, int64_t NewOffset = MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff; NewMI->getOperand(OffsetPos).setImm(NewOffset); - if (UpdateDAG) { - SU->setInstr(NewMI); - MISUnitMap[NewMI] = SU; - } + SU->setInstr(NewMI); + MISUnitMap[NewMI] = SU; NewMIs.insert(NewMI); - return NewMI; } } - return nullptr; } /// Return true for an order dependence that is loop carried potentially. @@ -3871,6 +3868,58 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { return true; } +/// Attempt to fix the degenerate cases when the instruction serialization +/// causes the register lifetimes to overlap. For example, +/// p' = store_pi(p, b) +/// = load p, offset +/// In this case p and p' overlap, which means that two registers are needed. +/// Instead, this function changes the load to use p' and updates the offset. +void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) { + unsigned OverlapReg = 0; + unsigned NewBaseReg = 0; + for (SUnit *SU : Instrs) { + MachineInstr *MI = SU->getInstr(); + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Look for an instruction that uses p. The instruction occurs in the + // same cycle but occurs later in the serialized order. + if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) { + // Check that the instruction appears in the InstrChanges structure, + // which contains instructions that can have the offset updated. + DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = + InstrChanges.find(SU); + if (It != InstrChanges.end()) { + unsigned BasePos, OffsetPos; + // Update the base register and adjust the offset. + if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) { + MachineInstr *NewMI = MF.CloneMachineInstr(MI); + NewMI->getOperand(BasePos).setReg(NewBaseReg); + int64_t NewOffset = + MI->getOperand(OffsetPos).getImm() - It->second.second; + NewMI->getOperand(OffsetPos).setImm(NewOffset); + SU->setInstr(NewMI); + MISUnitMap[NewMI] = SU; + NewMIs.insert(NewMI); + } + } + OverlapReg = 0; + NewBaseReg = 0; + break; + } + // Look for an instruction of the form p' = op(p), which uses and defines + // two virtual registers that get allocated to the same physical register. + unsigned TiedUseIdx = 0; + if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) { + // OverlapReg is p in the example above. + OverlapReg = MI->getOperand(TiedUseIdx).getReg(); + // NewBaseReg is p' in the example above. + NewBaseReg = MI->getOperand(i).getReg(); + break; + } + } + } +} + /// After the schedule has been formed, call this function to combine /// the instructions from the different stages/cycles. That is, this /// function creates a schedule that represents a single iteration. @@ -3931,7 +3980,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { // map. We need to use the new registers to create the correct order. for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) { SUnit *SU = &SSD->SUnits[i]; - SSD->applyInstrChange(SU->getInstr(), *this, true); + SSD->applyInstrChange(SU->getInstr(), *this); } // Reorder the instructions in each cycle to fix and improve the @@ -3955,11 +4004,13 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { // Replace the old order with the new order. cycleInstrs.swap(newOrderZC); cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end()); + SSD->fixupRegisterOverlaps(cycleInstrs); } DEBUG(dump();); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the schedule information to the given output. void SMSchedule::print(raw_ostream &os) const { // Iterate over each cycle. @@ -3975,7 +4026,6 @@ void SMSchedule::print(raw_ostream &os) const { } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Utility function used for debugging to print the schedule. LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); } #endif diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index be06053f0040..b82ab02a6e6c 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,6 +19,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -28,9 +31,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> using namespace llvm; @@ -183,7 +183,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { MachineOperand *MO = &M; MachineInstr *MI = MO->getParent(); if (!MI) { - errs() << PrintReg(Reg, getTargetRegisterInfo()) + errs() << printReg(Reg, getTargetRegisterInfo()) << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; @@ -192,19 +192,19 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); if (!(MO >= MO0 && MO < MO0+NumOps)) { - errs() << PrintReg(Reg, getTargetRegisterInfo()) + errs() << printReg(Reg, getTargetRegisterInfo()) << " use list MachineOperand " << MO << " doesn't belong to parent MI: " << *MI; Valid = false; } if (!MO->isReg()) { - errs() << PrintReg(Reg, getTargetRegisterInfo()) + errs() << printReg(Reg, getTargetRegisterInfo()) << " MachineOperand " << MO << ": " << *MO << " is not a register\n"; Valid = false; } if (MO->getReg() != Reg) { - errs() << PrintReg(Reg, getTargetRegisterInfo()) + errs() << printReg(Reg, getTargetRegisterInfo()) << " use-list MachineOperand " << MO << ": " << *MO << " is the wrong register\n"; Valid = false; @@ -428,8 +428,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, // Emit the copies into the top of the block. for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) if (LiveIns[i].second) { - if (use_empty(LiveIns[i].second)) { - // The livein has no uses. Drop it. + if (use_nodbg_empty(LiveIns[i].second)) { + // The livein has no non-dbg uses. Drop it. // // It would be preferable to have isel avoid creating live-in // records for unused arguments in the first place, but it's @@ -487,6 +487,13 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { return true; } +bool +MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const { + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + return isConstantPhysReg(PhysReg) || + TRI->isCallerPreservedPhysReg(PhysReg, *MF); +} + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. @@ -524,7 +531,7 @@ static bool isNoReturnDef(const MachineOperand &MO) { const MachineFunction &MF = *MBB.getParent(); // We need to keep correct unwind information even if the function will // not return, since the runtime may need it. - if (MF.getFunction()->hasFnAttribute(Attribute::UWTable)) + if (MF.getFunction().hasFnAttribute(Attribute::UWTable)) return false; const Function *Called = getCalledFunction(MI); return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) || diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index e9b47559309f..36844e9fb30a 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -15,31 +15,36 @@ #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" +#include <utility> + using namespace llvm; #define DEBUG_TYPE "machine-ssaupdater" -typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; +using AvailableValsTy = DenseMap<MachineBasicBlock *, unsigned>; + static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *NewPHI) - : AV(nullptr), InsertedPHIs(NewPHI) { - TII = MF.getSubtarget().getInstrInfo(); - MRI = &MF.getRegInfo(); -} + : InsertedPHIs(NewPHI), TII(MF.getSubtarget().getInstrInfo()), + MRI(&MF.getRegInfo()) {} MachineSSAUpdater::~MachineSSAUpdater() { delete static_cast<AvailableValsTy*>(AV); @@ -77,7 +82,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { static unsigned LookForIdenticalPHI(MachineBasicBlock *BB, - SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) { + SmallVectorImpl<std::pair<MachineBasicBlock *, unsigned>> &PredValues) { if (BB->empty()) return 0; @@ -136,7 +141,6 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, /// their respective blocks. However, the use of X happens in the *middle* of /// a block. Because of this, we need to insert a new PHI node in SomeBB to /// merge the appropriate values, and this value isn't live out of the block. -/// unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. @@ -233,14 +237,15 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) { /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl /// template, specialized for MachineSSAUpdater. namespace llvm { + template<> class SSAUpdaterTraits<MachineSSAUpdater> { public: - typedef MachineBasicBlock BlkT; - typedef unsigned ValT; - typedef MachineInstr PhiT; + using BlkT = MachineBasicBlock; + using ValT = unsigned; + using PhiT = MachineInstr; + using BlkSucc_iterator = MachineBasicBlock::succ_iterator; - typedef MachineBasicBlock::succ_iterator BlkSucc_iterator; static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } @@ -259,12 +264,16 @@ public: PHI_iterator &operator++() { idx += 2; return *this; } bool operator==(const PHI_iterator& x) const { return idx == x.idx; } bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { return PHI->getOperand(idx+1).getMBB(); } }; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); } @@ -309,7 +318,6 @@ public: } /// InstrIsPHI - Check if an instruction is a PHI. - /// static MachineInstr *InstrIsPHI(MachineInstr *I) { if (I && I->isPHI()) return I; @@ -338,7 +346,7 @@ public: } }; -} // End llvm namespace +} // end namespace llvm /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry /// for the specified BB and if so, return it. If not, construct SSA form by diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index eaba9a58557c..e15eb658a05c 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -42,8 +42,12 @@ #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -52,10 +56,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -98,7 +98,7 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function")); static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, - cl::desc("Only schedule this MBB#")); + cl::desc("Only schedule this MBB#")); #else static bool ViewMISchedDAGs = false; #endif // NDEBUG @@ -200,8 +200,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) -MachineScheduler::MachineScheduler() -: MachineSchedulerBase(ID) { +MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -225,8 +224,7 @@ char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) -PostMachineScheduler::PostMachineScheduler() -: MachineSchedulerBase(ID) { +PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -353,7 +351,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; if (EnableMachineSched.getNumOccurrences()) { @@ -391,7 +389,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; if (EnablePostRAMachineSched.getNumOccurrences()) { @@ -405,6 +403,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Initialize the context of the pass. MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); PassConfig = &getAnalysis<TargetPassConfig>(); if (VerifyScheduling) @@ -437,11 +436,67 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI, return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); } +/// A region of an MBB for scheduling. +namespace { +struct SchedRegion { + /// RegionBegin is the first instruction in the scheduling region, and + /// RegionEnd is either MBB->end() or the scheduling boundary after the + /// last instruction in the scheduling region. These iterators cannot refer + /// to instructions outside of the identified scheduling region because + /// those may be reordered before scheduling this region. + MachineBasicBlock::iterator RegionBegin; + MachineBasicBlock::iterator RegionEnd; + unsigned NumRegionInstrs; + + SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, + unsigned N) : + RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {} +}; +} // end anonymous namespace + +using MBBRegionsVector = SmallVector<SchedRegion, 16>; + +static void +getSchedRegions(MachineBasicBlock *MBB, + MBBRegionsVector &Regions, + bool RegionsTopDown) { + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + MachineBasicBlock::iterator I = nullptr; + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = I) { + + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() || + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { + --RegionEnd; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; + I = RegionEnd; + for (;I != MBB->begin(); --I) { + MachineInstr &MI = *std::prev(I); + if (isSchedBoundary(&MI, &*MBB, MF, TII)) + break; + if (!MI.isDebugValue()) + // MBB::size() uses instr_iterator to count. Here we need a bundle to + // count as a single instruction. + ++NumRegionInstrs; + } + + Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); + } + + if (RegionsTopDown) + std::reverse(Regions.begin(), Regions.end()); +} + /// Main driver for both MachineScheduler and PostMachineScheduler. void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags) { - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - // Visit all machine basic blocks. // // TODO: Visit blocks in global postorder or postorder within the bottom-up @@ -459,39 +514,28 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, continue; #endif - // Break the block into scheduling regions [I, RegionEnd), and schedule each - // region as soon as it is discovered. RegionEnd points the scheduling - // boundary at the bottom of the region. The DAG does not include RegionEnd, - // but the region does (i.e. the next RegionEnd is above the previous - // RegionBegin). If the current block has no terminator then RegionEnd == - // MBB->end() for the bottom region. + // Break the block into scheduling regions [I, RegionEnd). RegionEnd + // points to the scheduling boundary at the bottom of the region. The DAG + // does not include RegionEnd, but the region does (i.e. the next + // RegionEnd is above the previous RegionBegin). If the current block has + // no terminator then RegionEnd == MBB->end() for the bottom region. + // + // All the regions of MBB are first found and stored in MBBRegions, which + // will be processed (MBB) top-down if initialized with true. // // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and - // 'RegionEnd' are invalid across these calls. - // - // MBB::size() uses instr_iterator to count. Here we need a bundle to count - // as a single instruction. - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { - - // Avoid decrementing RegionEnd for blocks with no terminator. - if (RegionEnd != MBB->end() || - isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { - --RegionEnd; - } + // 'RegionEnd' are invalid across these calls. Instructions must not be + // added to other regions than the current one without updating MBBRegions. + + MBBRegionsVector MBBRegions; + getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown()); + for (MBBRegionsVector::iterator R = MBBRegions.begin(); + R != MBBRegions.end(); ++R) { + MachineBasicBlock::iterator I = R->RegionBegin; + MachineBasicBlock::iterator RegionEnd = R->RegionEnd; + unsigned NumRegionInstrs = R->NumRegionInstrs; - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - unsigned NumRegionInstrs = 0; - MachineBasicBlock::iterator I = RegionEnd; - for (; I != MBB->begin(); --I) { - MachineInstr &MI = *std::prev(I); - if (isSchedBoundary(&MI, &*MBB, MF, TII)) - break; - if (!MI.isDebugValue()) - ++NumRegionInstrs; - } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); @@ -504,28 +548,23 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, continue; } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getName() - << ":BB#" << MBB->getNumber() << " " << MBB->getName() - << "\n From: " << *I << " To: "; + DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " " + << MBB->getName() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); if (DumpCriticalPathLength) { errs() << MF->getName(); - errs() << ":BB# " << MBB->getNumber(); + errs() << ":%bb. " << MBB->getNumber(); errs() << " " << MBB->getName() << " \n"; } // Schedule a region: possibly reorder instructions. - // This invalidates 'RegionEnd' and 'I'. + // This invalidates the original region iterators. Scheduler.schedule(); // Close the current region. Scheduler.exitRegion(); - - // Scheduling has invalidated the current iterator 'I'. Ask the - // scheduler for the top of it's scheduled region. - RegionEnd = Scheduler.begin(); } Scheduler.finishBlock(); // FIXME: Ideally, no further passes should rely on kill flags. However, @@ -650,6 +689,16 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { releasePred(SU, &Pred); } +void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) { + ScheduleDAGInstrs::startBlock(bb); + SchedImpl->enterMBB(bb); +} + +void ScheduleDAGMI::finishBlock() { + SchedImpl->leaveMBB(); + ScheduleDAGInstrs::finishBlock(); +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions @@ -773,11 +822,11 @@ void ScheduleDAGMI::schedule() { placeDebugValues(); DEBUG({ - unsigned BBNum = begin()->getParent()->getNumber(); - dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } /// Apply each ScheduleDAGMutation step in order. @@ -1004,7 +1053,10 @@ void ScheduleDAGMILive::initRegPressure() { dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); ); - assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + assert((BotRPTracker.getPos() == RegionEnd || + (RegionEnd->isDebugValue() && + BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && + "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track // the max pressure in the scheduled code for these sets. @@ -1080,7 +1132,7 @@ void ScheduleDAGMILive::updatePressureDiffs( PDiff.addPressureChange(Reg, Decrement, &MRI); DEBUG( dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " - << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) + << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr(); dbgs() << " to "; PDiff.dump(*TRI); @@ -1088,7 +1140,7 @@ void ScheduleDAGMILive::updatePressureDiffs( } } else { assert(P.LaneMask.any()); - DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n"); // This may be called before CurrentBottom has been initialized. However, // BotRPTracker must have a valid position. We want the value live into the // instruction or live out of the block, so ask for the previous @@ -1211,11 +1263,11 @@ void ScheduleDAGMILive::schedule() { placeDebugValues(); DEBUG({ - unsigned BBNum = begin()->getParent()->getNumber(); - dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } /// Build the DAG and setup three register pressure trackers. @@ -1410,7 +1462,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { RegOpers.detectDeadDefs(*MI, *LIS); } - BotRPTracker.recedeSkipDebugValues(); + if (BotRPTracker.getPos() != CurrentBottom) + BotRPTracker.recedeSkipDebugValues(); SmallVector<RegisterMaskPair, 8> LiveUses; BotRPTracker.recede(RegOpers, &LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); @@ -1511,14 +1564,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( std::sort(MemOpRecords.begin(), MemOpRecords.end()); unsigned ClusterLength = 1; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { - if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) { - ClusterLength = 1; - continue; - } - SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(), + if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg, + *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg, ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" @@ -1541,7 +1590,6 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( /// \brief Callback from DAG postProcessing to create cluster edges for loads. void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); // Map DAG NodeNum to store chain ID. @@ -1587,6 +1635,7 @@ namespace { class CopyConstrain : public ScheduleDAGMutation { // Transient state. SlotIndex RegionBeginIdx; + // RegionEndIdx is the slot index of the last non-debug instruction in the // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. SlotIndex RegionEndIdx; @@ -1785,6 +1834,13 @@ static const unsigned InvalidCycle = ~0U; SchedBoundary::~SchedBoundary() { delete HazardRec; } +/// Given a Count of resource usage and a Latency value, return true if a +/// SchedBoundary becomes resource limited. +static bool checkResourceLimit(unsigned LFactor, unsigned Count, + unsigned Latency) { + return (int)(Count - (Latency * LFactor)) > (int)LFactor; +} + void SchedBoundary::reset() { // A new HazardRec is created for each DAG and owned by SchedBoundary. // Destroying and reconstructing it is very expensive though. So keep @@ -1916,16 +1972,18 @@ bool SchedBoundary::checkHazard(SUnit *SU) { if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + for (const MCWriteProcResEntry &PE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + unsigned ResIdx = PE.ProcResourceIdx; + unsigned Cycles = PE.Cycles; + unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG - MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); + MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(PI->ProcResourceIdx) + << SchedModel->getResourceName(ResIdx) << "=" << NRCycle << "c\n"); return true; } @@ -2037,10 +2095,9 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) { } } CheckPending = true; - unsigned LFactor = SchedModel->getLatencyFactor(); IsResourceLimited = - (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) - > (int)LFactor; + checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), + getScheduledLatency()); DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); } @@ -2193,16 +2250,15 @@ void SchedBoundary::bumpNode(SUnit *SU) { << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); } // If we stall for any reason, bump the cycle. - if (NextCycle > CurrCycle) { + if (NextCycle > CurrCycle) bumpCycle(NextCycle); - } else { + else // After updating ZoneCritResIdx and ExpectedLatency, check if we're // resource limited. If a stall occurred, bumpCycle does this. - unsigned LFactor = SchedModel->getLatencyFactor(); IsResourceLimited = - (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) - > (int)LFactor; - } + checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), + getScheduledLatency()); + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle // resets CurrMOps. Loop to handle instructions with more MOps than issue in // one cycle. Since we commonly reach the max MOps here, opportunistically @@ -2317,7 +2373,7 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { ResCount = getResourceCount(ZoneCritResIdx); } else { ResFactor = SchedModel->getMicroOpFactor(); - ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * ResFactor; } unsigned LFactor = SchedModel->getLatencyFactor(); dbgs() << Available.getName() << " @" << CurrCycle << "c\n" @@ -2387,10 +2443,10 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) { - unsigned LFactor = SchedModel->getLatencyFactor(); - OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; - } + if (SchedModel->hasInstrSchedModel()) + OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), + OtherCount, RemLatency); + // Schedule aggressively for latency in PostRA mode. We don't check for // acyclic latency during PostRA, and highly out-of-order processors will // skip PostRA scheduling. @@ -2605,7 +2661,7 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { - const MachineFunction &MF = *Begin->getParent()->getParent(); + const MachineFunction &MF = *Begin->getMF(); const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Avoid setting up the register pressure tracker for small regions to save @@ -3199,7 +3255,6 @@ void PostGenericScheduler::registerRoots() { /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { - // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; @@ -3438,6 +3493,7 @@ class InstructionShuffler : public MachineSchedStrategy { // instructions to be scheduled first. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> BottomQ; @@ -3554,6 +3610,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { SS << " I:" << DFS->getNumInstrs(SU); return SS.str(); } + static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { return G->getGraphNodeLabel(SU); } @@ -3577,7 +3634,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG /// rendered using 'dot'. -/// void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { #ifndef NDEBUG ViewGraph(this, Name, false, Title); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 79e3fea3f90c..bedfdd84b1ca 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -1,4 +1,4 @@ -//===-- MachineSink.cpp - Sinking for machine instructions ----------------===// +//===- MachineSink.cpp - Sinking for machine instructions -----------------===// // // The LLVM Compiler Infrastructure // @@ -18,6 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,14 +33,17 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -93,12 +97,12 @@ namespace { // Remember which edges we are about to split. // This is different from CEBCandidates since those edges // will be split. - SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit; + SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit; SparseBitVector<> RegsToClearKillFlags; - typedef std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>> - AllSuccsCache; + using AllSuccsCache = + std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; public: static char ID; // Pass identification @@ -133,6 +137,7 @@ namespace { bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To); + /// \brief Postpone the splitting of the given critical /// edge (\p From, \p To). /// @@ -150,6 +155,7 @@ namespace { MachineBasicBlock *To, bool BreakPHIEdge); bool SinkInstruction(MachineInstr &MI, bool &SawStore, + AllSuccsCache &AllSuccessors); bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, @@ -172,7 +178,9 @@ namespace { } // end anonymous namespace char MachineSinking::ID = 0; + char &llvm::MachineSinkingID = MachineSinking::ID; + INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) @@ -236,17 +244,17 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. // - // BB#1: derived from LLVM BB %bb4.preheader - // Predecessors according to CFG: BB#0 + // %bb.1: derived from LLVM BB %bb4.preheader + // Predecessors according to CFG: %bb.0 // ... - // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead> + // %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags // ... - // JE_4 <BB#37>, %EFLAGS<imp-use> - // Successors according to CFG: BB#37 BB#2 + // JE_4 <%bb.37>, implicit %eflags + // Successors according to CFG: %bb.37 %bb.2 // - // BB#2: derived from LLVM BB %bb.nph - // Predecessors according to CFG: BB#0 BB#1 - // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1> + // %bb.2: derived from LLVM BB %bb.nph + // Predecessors according to CFG: %bb.0 %bb.1 + // %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1 BreakPHIEdge = true; for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { MachineInstr *UseInst = MO.getParent(); @@ -284,7 +292,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(dbgs() << "******** Machine Sinking ********\n"); @@ -314,10 +322,10 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { for (auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { - DEBUG(dbgs() << " *** Splitting critical edge:" - " BB#" << Pair.first->getNumber() - << " -- BB#" << NewSucc->getNumber() - << " -- BB#" << Pair.second->getNumber() << '\n'); + DEBUG(dbgs() << " *** Splitting critical edge: " + << printMBBReference(*Pair.first) << " -- " + << printMBBReference(*NewSucc) << " -- " + << printMBBReference(*Pair.second) << '\n'); MadeChange = true; ++NumSplit; } else @@ -453,33 +461,33 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, // It's not always legal to break critical edges and sink the computation // to the edge. // - // BB#1: + // %bb.1: // v1024 - // Beq BB#3 + // Beq %bb.3 // <fallthrough> - // BB#2: + // %bb.2: // ... no uses of v1024 // <fallthrough> - // BB#3: + // %bb.3: // ... // = v1024 // - // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: // - // BB#1: + // %bb.1: // ... - // Bne BB#2 - // BB#4: + // Bne %bb.2 + // %bb.4: // v1024 = - // B BB#3 - // BB#2: + // B %bb.3 + // %bb.2: // ... no uses of v1024 // <fallthrough> - // BB#3: + // %bb.3: // ... // = v1024 // - // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 // flow. We need to ensure the new basic block where the computation is // sunk to dominates all the uses. // It's only legal to break critical edge and sink the computation to the @@ -570,7 +578,6 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, SmallVector<MachineBasicBlock *, 4> & MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const { - // Do we have the sorted successors in cache ? auto Succs = AllSuccessors.find(MBB); if (Succs != AllSuccessors.end()) @@ -711,7 +718,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate; + using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; auto *MBB = MI.getParent(); if (MBB->pred_size() != 1) @@ -784,7 +791,6 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, if (!SuccToSinkTo) return false; - // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) @@ -863,11 +869,20 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, SmallVector<MachineInstr *, 2> DbgValuesToSink; collectDebugValues(MI, DbgValuesToSink); + // Merge or erase debug location to ensure consistent stepping in profilers + // and debuggers. + if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end()) + MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), + InsertPos->getDebugLoc())); + else + MI.setDebugLoc(DebugLoc()); + + // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); - // Move debug values. + // Move previously adjacent debug value instructions to the insert position. for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 6c5abc66fba1..d81c6f8a31e1 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -22,15 +22,15 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <iterator> @@ -42,6 +42,7 @@ using namespace llvm; #define DEBUG_TYPE "machine-trace-metrics" char MachineTraceMetrics::ID = 0; + char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, @@ -395,7 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { } void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) + << '\n'); BlockInfo[MBB->getNumber()].invalidate(); for (unsigned i = 0; i != TS_NumStrategies; ++i) if (Ensembles[i]) @@ -475,8 +477,8 @@ public: /// Compute the trace through MBB. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" - << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Computing " << getName() << " trace through " + << printMBBReference(*MBB) << '\n'); // Set up loop bounds for the backwards post-order traversal. LoopBounds Bounds(BlockInfo, MTM.Loops); @@ -484,13 +486,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { Bounds.Downward = false; Bounds.Visited.clear(); for (auto I : inverse_post_order_ext(MBB, Bounds)) { - DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. TBI.Pred = pickTracePred(I); DEBUG({ if (TBI.Pred) - dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + dbgs() << printMBBReference(*TBI.Pred) << '\n'; else dbgs() << "null\n"; }); @@ -502,13 +504,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { Bounds.Downward = true; Bounds.Visited.clear(); for (auto I : post_order_ext(MBB, Bounds)) { - DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. TBI.Succ = pickTraceSucc(I); DEBUG({ if (TBI.Succ) - dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + dbgs() << printMBBReference(*TBI.Succ) << '\n'; else dbgs() << "null\n"; }); @@ -529,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { WorkList.push_back(BadMBB); do { const MachineBasicBlock *MBB = WorkList.pop_back_val(); - DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() - << " height.\n"); + DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' + << getName() << " height.\n"); // Find any MBB predecessors that have MBB as their preferred successor. // They are the only ones that need to be invalidated. for (const MachineBasicBlock *Pred : MBB->predecessors()) { @@ -554,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { WorkList.push_back(BadMBB); do { const MachineBasicBlock *MBB = WorkList.pop_back_val(); - DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() - << " depth.\n"); + DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' + << getName() << " depth.\n"); // Find any MBB successors that have MBB as their preferred predecessor. // They are the only ones that need to be invalidated. for (const MachineBasicBlock *Succ : MBB->successors()) { @@ -694,25 +696,6 @@ static void getPHIDeps(const MachineInstr &UseMI, } } -// Keep track of physreg data dependencies by recording each live register unit. -// Associate each regunit with an instruction operand. Depending on the -// direction instructions are scanned, it could be the operand that defined the -// regunit, or the highest operand to read the regunit. -namespace { - -struct LiveRegUnit { - unsigned RegUnit; - unsigned Cycle = 0; - const MachineInstr *MI = nullptr; - unsigned Op = 0; - - unsigned getSparseSetIndex() const { return RegUnit; } - - LiveRegUnit(unsigned RU) : RegUnit(RU) {} -}; - -} // end anonymous namespace - // Identify physreg dependencies for UseMI, and update the live regunit // tracking set when scanning instructions downwards. static void updatePhysDepsDownwards(const MachineInstr *UseMI, @@ -797,6 +780,59 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { return MaxLen; } +void MachineTraceMetrics::Ensemble:: +updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, + SparseSet<LiveRegUnit> &RegUnits) { + SmallVector<DataDep, 8> Deps; + // Collect all data dependencies. + if (UseMI.isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (const DataDep &Dep : Deps) { + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.isUsefulDominator(TBI)) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[&UseMI]; + MICycles.Depth = Cycle; + + if (TBI.HasValidInstrHeights) { + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); + } else { + DEBUG(dbgs() << Cycle << '\t' << UseMI); + } +} + +void MachineTraceMetrics::Ensemble:: +updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI, + SparseSet<LiveRegUnit> &RegUnits) { + updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits); +} + +void MachineTraceMetrics::Ensemble:: +updateDepths(MachineBasicBlock::iterator Start, + MachineBasicBlock::iterator End, + SparseSet<LiveRegUnit> &RegUnits) { + for (; Start != End; Start++) + updateDepth(Start->getParent(), *Start, RegUnits); +} + /// Compute instruction depths for all instructions above or in MBB in its /// trace. This assumes that the trace through MBB has already been computed. void MachineTraceMetrics::Ensemble:: @@ -822,10 +858,9 @@ computeInstrDepths(const MachineBasicBlock *MBB) { RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); // Go through trace blocks in top-down order, stopping after the center block. - SmallVector<DataDep, 8> Deps; while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; @@ -848,40 +883,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); for (const auto &UseMI : *MBB) { - // Collect all data dependencies. - Deps.clear(); - if (UseMI.isPHI()) - getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); - else if (getDataDeps(UseMI, Deps, MTM.MRI)) - updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); - - // Filter and process dependencies, computing the earliest issue cycle. - unsigned Cycle = 0; - for (const DataDep &Dep : Deps) { - const TraceBlockInfo&DepTBI = - BlockInfo[Dep.DefMI->getParent()->getNumber()]; - // Ignore dependencies from outside the current trace. - if (!DepTBI.isUsefulDominator(TBI)) - continue; - assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); - unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; - // Add latency if DefMI is a real instruction. Transients get latency 0. - if (!Dep.DefMI->isTransient()) - DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); - Cycle = std::max(Cycle, DepCycle); - } - // Remember the instruction depth. - InstrCycles &MICycles = Cycles[&UseMI]; - MICycles.Depth = Cycle; - - if (!TBI.HasValidInstrHeights) { - DEBUG(dbgs() << Cycle << '\t' << UseMI); - continue; - } - // Update critical path length. - TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); + updateDepth(TBI, UseMI, RegUnits); } } } @@ -945,7 +947,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, return Height; } -typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; +using MIHeightMap = DenseMap<const MachineInstr *, unsigned>; // Push the height of DefMI upwards if required to match UseMI. // Return true if this is the first time DefMI was seen. @@ -1043,7 +1045,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { SmallVector<DataDep, 8> Deps; for (;!Stack.empty(); Stack.pop_back()) { MBB = Stack.back(); - DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; @@ -1130,18 +1132,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // Update virtual live-in heights. They were added by addLiveIns() with a 0 // height because the final height isn't known until now. - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); for (LiveInReg &LIR : TBI.LiveIns) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); LIR.Height = Heights.lookup(DefMI); - DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height); } // Transfer the live regunits to the live-in list. for (SparseSet<LiveRegUnit>::const_iterator RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); - DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@' << RI->Cycle); } DEBUG(dbgs() << '\n'); @@ -1288,7 +1290,7 @@ bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI, void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { - OS << " BB#" << i << '\t'; + OS << " %bb." << i << '\t'; BlockInfo[i].print(OS); OS << '\n'; } @@ -1298,10 +1300,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { if (hasValidDepth()) { OS << "depth=" << InstrDepth; if (Pred) - OS << " pred=BB#" << Pred->getNumber(); + OS << " pred=" << printMBBReference(*Pred); else OS << " pred=null"; - OS << " head=BB#" << Head; + OS << " head=%bb." << Head; if (HasValidInstrDepths) OS << " +instrs"; } else @@ -1310,10 +1312,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { if (hasValidHeight()) { OS << "height=" << InstrHeight; if (Succ) - OS << " succ=BB#" << Succ->getNumber(); + OS << " succ=" << printMBBReference(*Succ); else OS << " succ=null"; - OS << " tail=BB#" << Tail; + OS << " tail=%bb." << Tail; if (HasValidInstrHeights) OS << " +instrs"; } else @@ -1325,18 +1327,18 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { unsigned MBBNum = &TBI - &TE.BlockInfo[0]; - OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum - << " --> BB#" << TBI.Tail << ':'; + OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum + << " --> %bb." << TBI.Tail << ':'; if (TBI.hasValidHeight() && TBI.hasValidDepth()) OS << ' ' << getInstrCount() << " instrs."; if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) OS << ' ' << TBI.CriticalPath << " cycles."; const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; - OS << "\nBB#" << MBBNum; + OS << "\n%bb." << MBBNum; while (Block->hasValidDepth() && Block->Pred) { unsigned Num = Block->Pred->getNumber(); - OS << " <- BB#" << Num; + OS << " <- " << printMBBReference(*Block->Pred); Block = &TE.BlockInfo[Num]; } @@ -1344,7 +1346,7 @@ void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { OS << "\n "; while (Block->hasValidHeight() && Block->Succ) { unsigned Num = Block->Succ->getNumber(); - OS << " -> BB#" << Num; + OS << " -> " << printMBBReference(*Block->Succ); Block = &TE.BlockInfo[Num]; } OS << '\n'; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index c50a95a06505..c9fe7681e280 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -1,4 +1,4 @@ -//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===// +//===- MachineVerifier.cpp - Machine Code Verifier ------------------------===// // // The LLVM Compiler Infrastructure // @@ -23,41 +23,68 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <string> +#include <utility> + using namespace llvm; namespace { - struct MachineVerifier { - MachineVerifier(Pass *pass, const char *b) : - PASS(pass), - Banner(b) - {} + struct MachineVerifier { + MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {} unsigned verify(MachineFunction &MF); @@ -75,11 +102,11 @@ namespace { bool isFunctionRegBankSelected; bool isFunctionSelected; - typedef SmallVector<unsigned, 16> RegVector; - typedef SmallVector<const uint32_t*, 4> RegMaskVector; - typedef DenseSet<unsigned> RegSet; - typedef DenseMap<unsigned, const MachineInstr*> RegMap; - typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; + using RegVector = SmallVector<unsigned, 16>; + using RegMaskVector = SmallVector<const uint32_t *, 4>; + using RegSet = DenseSet<unsigned>; + using RegMap = DenseMap<unsigned, const MachineInstr *>; + using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>; const MachineInstr *FirstTerminator; BlockSet FunctionBlocks; @@ -101,7 +128,7 @@ namespace { struct BBInfo { // Is this MBB reachable from the MF entry point? - bool reachable; + bool reachable = false; // Vregs that must be live in because they are used without being // defined. Map value is the user. @@ -126,7 +153,7 @@ namespace { // Set versions of block's predecessor and successor lists. BlockSet Preds, Succs; - BBInfo() : reachable(false) {} + BBInfo() = default; // Add register to vregsPassed if it belongs there. Return true if // anything changed. @@ -237,7 +264,7 @@ namespace { void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); - void checkPHIOps(const MachineBasicBlock *MBB); + void checkPHIOps(const MachineBasicBlock &MBB); void calcRegsRequired(); void verifyLiveVariables(); @@ -259,6 +286,7 @@ namespace { struct MachineVerifierPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid + const std::string Banner; MachineVerifierPass(std::string banner = std::string()) @@ -279,9 +307,10 @@ namespace { } }; -} +} // end anonymous namespace char MachineVerifierPass::ID = 0; + INITIALIZE_PASS(MachineVerifierPass, "machineverifier", "Verify generated machine code", false, false) @@ -442,9 +471,8 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - errs() << "- basic block: BB#" << MBB->getNumber() - << ' ' << MBB->getName() - << " (" << (const void*)MBB << ')'; + errs() << "- basic block: " << printMBBReference(*MBB) << ' ' + << MBB->getName() << " (" << (const void *)MBB << ')'; if (Indexes) errs() << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -499,14 +527,14 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const { } void MachineVerifier::report_context_vreg(unsigned VReg) const { - errs() << "- v. register: " << PrintReg(VReg, TRI) << '\n'; + errs() << "- v. register: " << printReg(VReg, TRI) << '\n'; } void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { report_context_vreg(VRegOrUnit); } else { - errs() << "- regunit: " << PrintRegUnit(VRegOrUnit, TRI) << '\n'; + errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; } } @@ -590,8 +618,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Preds.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the predecessor list of the successor BB#" - << (*I)->getNumber() << ".\n"; + errs() << "MBB is not in the predecessor list of the successor " + << printMBBReference(*(*I)) << ".\n"; } } @@ -602,19 +630,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has predecessor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Succs.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the successor list of the predecessor BB#" - << (*I)->getNumber() << ".\n"; + errs() << "MBB is not in the successor list of the predecessor " + << printMBBReference(*(*I)) << ".\n"; } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); const BasicBlock *BB = MBB->getBasicBlock(); - const Function *Fn = MF->getFunction(); + const Function &F = MF->getFunction(); if (LandingPadSuccs.size() > 1 && !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && BB && isa<SwitchInst>(BB->getTerminator())) && - !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn()))) + !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -926,6 +954,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { report("Generic instruction accessing memory must have one mem operand", MI); break; + case TargetOpcode::G_PHI: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + if (!DstTy.isValid() || + !std::all_of(MI->operands_begin() + 1, MI->operands_end(), + [this, &DstTy](const MachineOperand &MO) { + if (!MO.isReg()) + return true; + LLT Ty = MRI->getType(MO.getReg()); + if (!Ty.isValid() || (Ty != DstTy)) + return false; + return true; + })) + report("Generic Instruction G_PHI has operands with incompatible/missing " + "types", + MI); + break; + } case TargetOpcode::STATEPOINT: if (!MI->getOperand(StatepointOpers::IDPos).isImm() || !MI->getOperand(StatepointOpers::NBytesPos).isImm() || @@ -1039,101 +1084,112 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Two-address instruction operands must be identical", MO, MONum); // Check register classes. - if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { - unsigned SubIdx = MO->getSubReg(); + unsigned SubIdx = MO->getSubReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (SubIdx) { - report("Illegal subregister index for physical register", MO, MONum); - return; - } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (SubIdx) { + report("Illegal subregister index for physical register", MO, MONum); + return; + } + if (MONum < MCID.getNumOperands()) { if (const TargetRegisterClass *DRC = TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - errs() << TRI->getName(Reg) << " is not a " - << TRI->getRegClassName(DRC) << " register.\n"; + errs() << printReg(Reg, TRI) << " is not a " + << TRI->getRegClassName(DRC) << " register.\n"; } } - } else { - // Virtual register. - const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); - if (!RC) { - // This is a generic virtual register. - - // If we're post-Select, we can't have gvregs anymore. - if (isFunctionSelected) { - report("Generic virtual register invalid in a Selected function", - MO, MONum); - return; - } + } + if (MO->isRenamable() && + ((MO->isDef() && MI->hasExtraDefRegAllocReq()) || + (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) { + report("Illegal isRenamable setting for opcode with extra regalloc " + "requirements", + MO, MONum); + return; + } + } else { + // Virtual register. + const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); + if (!RC) { + // This is a generic virtual register. + + // If we're post-Select, we can't have gvregs anymore. + if (isFunctionSelected) { + report("Generic virtual register invalid in a Selected function", + MO, MONum); + return; + } - // The gvreg must have a type and it must not have a SubIdx. - LLT Ty = MRI->getType(Reg); - if (!Ty.isValid()) { - report("Generic virtual register must have a valid type", MO, - MONum); - return; - } + // The gvreg must have a type and it must not have a SubIdx. + LLT Ty = MRI->getType(Reg); + if (!Ty.isValid()) { + report("Generic virtual register must have a valid type", MO, + MONum); + return; + } - const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg); + const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg); - // If we're post-RegBankSelect, the gvreg must have a bank. - if (!RegBank && isFunctionRegBankSelected) { - report("Generic virtual register must have a bank in a " - "RegBankSelected function", - MO, MONum); - return; - } + // If we're post-RegBankSelect, the gvreg must have a bank. + if (!RegBank && isFunctionRegBankSelected) { + report("Generic virtual register must have a bank in a " + "RegBankSelected function", + MO, MONum); + return; + } - // Make sure the register fits into its register bank if any. - if (RegBank && Ty.isValid() && - RegBank->getSize() < Ty.getSizeInBits()) { - report("Register bank is too small for virtual register", MO, - MONum); - errs() << "Register bank " << RegBank->getName() << " too small(" - << RegBank->getSize() << ") to fit " << Ty.getSizeInBits() - << "-bits\n"; - return; - } - if (SubIdx) { - report("Generic virtual register does not subregister index", MO, - MONum); - return; - } + // Make sure the register fits into its register bank if any. + if (RegBank && Ty.isValid() && + RegBank->getSize() < Ty.getSizeInBits()) { + report("Register bank is too small for virtual register", MO, + MONum); + errs() << "Register bank " << RegBank->getName() << " too small(" + << RegBank->getSize() << ") to fit " << Ty.getSizeInBits() + << "-bits\n"; + return; + } + if (SubIdx) { + report("Generic virtual register does not subregister index", MO, + MONum); + return; + } - // If this is a target specific instruction and this operand - // has register class constraint, the virtual register must - // comply to it. - if (!isPreISelGenericOpcode(MCID.getOpcode()) && - TII->getRegClass(MCID, MONum, TRI, *MF)) { - report("Virtual register does not match instruction constraint", MO, - MONum); - errs() << "Expect register class " - << TRI->getRegClassName( - TII->getRegClass(MCID, MONum, TRI, *MF)) - << " but got nothing\n"; - return; - } + // If this is a target specific instruction and this operand + // has register class constraint, the virtual register must + // comply to it. + if (!isPreISelGenericOpcode(MCID.getOpcode()) && + MONum < MCID.getNumOperands() && + TII->getRegClass(MCID, MONum, TRI, *MF)) { + report("Virtual register does not match instruction constraint", MO, + MONum); + errs() << "Expect register class " + << TRI->getRegClassName( + TII->getRegClass(MCID, MONum, TRI, *MF)) + << " but got nothing\n"; + return; + } - break; + break; + } + if (SubIdx) { + const TargetRegisterClass *SRC = + TRI->getSubClassWithSubReg(RC, SubIdx); + if (!SRC) { + report("Invalid subregister index for virtual register", MO, MONum); + errs() << "Register class " << TRI->getRegClassName(RC) + << " does not support subreg index " << SubIdx << "\n"; + return; } - if (SubIdx) { - const TargetRegisterClass *SRC = - TRI->getSubClassWithSubReg(RC, SubIdx); - if (!SRC) { - report("Invalid subregister index for virtual register", MO, MONum); - errs() << "Register class " << TRI->getRegClassName(RC) - << " does not support subreg index " << SubIdx << "\n"; - return; - } - if (RC != SRC) { - report("Invalid register class for subregister index", MO, MONum); - errs() << "Register class " << TRI->getRegClassName(RC) - << " does not fully support subreg index " << SubIdx << "\n"; - return; - } + if (RC != SRC) { + report("Invalid register class for subregister index", MO, MONum); + errs() << "Register class " << TRI->getRegClassName(RC) + << " does not fully support subreg index " << SubIdx << "\n"; + return; } + } + if (MONum < MCID.getNumOperands()) { if (const TargetRegisterClass *DRC = TII->getRegClass(MCID, MONum, TRI, *MF)) { if (SubIdx) { @@ -1449,8 +1505,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } -void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { -} +void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {} // This function gets called after visiting all instructions in a bundle. The // argument points to the bundle header. @@ -1559,32 +1614,66 @@ void MachineVerifier::calcRegsRequired() { // Check PHI instructions at the beginning of MBB. It is assumed that // calcRegsPassed has been run so BBInfo::isLiveOut is valid. -void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { +void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + SmallPtrSet<const MachineBasicBlock*, 8> seen; - for (const auto &BBI : *MBB) { - if (!BBI.isPHI()) + for (const MachineInstr &Phi : MBB) { + if (!Phi.isPHI()) break; seen.clear(); - for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) { - unsigned Reg = BBI.getOperand(i).getReg(); - const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB(); - if (!Pre->isSuccessor(MBB)) + const MachineOperand &MODef = Phi.getOperand(0); + if (!MODef.isReg() || !MODef.isDef()) { + report("Expected first PHI operand to be a register def", &MODef, 0); + continue; + } + if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() || + MODef.isEarlyClobber() || MODef.isDebug()) + report("Unexpected flag on PHI operand", &MODef, 0); + unsigned DefReg = MODef.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + report("Expected first PHI operand to be a virtual register", &MODef, 0); + + for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { + const MachineOperand &MO0 = Phi.getOperand(I); + if (!MO0.isReg()) { + report("Expected PHI operand to be a register", &MO0, I); + continue; + } + if (MO0.isImplicit() || MO0.isInternalRead() || MO0.isEarlyClobber() || + MO0.isDebug() || MO0.isTied()) + report("Unexpected flag on PHI operand", &MO0, I); + + const MachineOperand &MO1 = Phi.getOperand(I + 1); + if (!MO1.isMBB()) { + report("Expected PHI operand to be a basic block", &MO1, I + 1); + continue; + } + + const MachineBasicBlock &Pre = *MO1.getMBB(); + if (!Pre.isSuccessor(&MBB)) { + report("PHI input is not a predecessor block", &MO1, I + 1); continue; - seen.insert(Pre); - BBInfo &PrInfo = MBBInfoMap[Pre]; - if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) - report("PHI operand is not live-out from predecessor", - &BBI.getOperand(i), i); + } + + if (MInfo.reachable) { + seen.insert(&Pre); + BBInfo &PrInfo = MBBInfoMap[&Pre]; + if (!MO0.isUndef() && PrInfo.reachable && + !PrInfo.isLiveOut(MO0.getReg())) + report("PHI operand is not live-out from predecessor", &MO0, I); + } } // Did we see all predecessors? - for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), - PrE = MBB->pred_end(); PrI != PrE; ++PrI) { - if (!seen.count(*PrI)) { - report("Missing PHI operand", &BBI); - errs() << "BB#" << (*PrI)->getNumber() - << " is a predecessor according to the CFG.\n"; + if (MInfo.reachable) { + for (MachineBasicBlock *Pred : MBB.predecessors()) { + if (!seen.count(Pred)) { + report("Missing PHI operand", &Phi); + errs() << printMBBReference(*Pred) + << " is a predecessor according to the CFG.\n"; + } } } } @@ -1593,15 +1682,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionAfter() { calcRegsPassed(); - for (const auto &MBB : *MF) { - BBInfo &MInfo = MBBInfoMap[&MBB]; - - // Skip unreachable MBBs. - if (!MInfo.reachable) - continue; - - checkPHIOps(&MBB); - } + for (const MachineBasicBlock &MBB : *MF) + checkPHIOps(MBB); // Now check liveness info if available calcRegsRequired(); @@ -1614,8 +1696,8 @@ void MachineVerifier::visitMachineFunctionAfter() { ++I) if (MInfo.regsKilled.count(*I)) { report("Virtual register killed in block, but needed live out.", &MBB); - errs() << "Virtual register " << PrintReg(*I) - << " is used after the block.\n"; + errs() << "Virtual register " << printReg(*I) + << " is used after the block.\n"; } } @@ -1647,14 +1729,14 @@ void MachineVerifier::verifyLiveVariables() { if (MInfo.vregsRequired.count(Reg)) { if (!VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block missing from AliveBlocks", &MBB); - errs() << "Virtual register " << PrintReg(Reg) - << " must be live through the block.\n"; + errs() << "Virtual register " << printReg(Reg) + << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", &MBB); - errs() << "Virtual register " << PrintReg(Reg) - << " is not needed live through the block.\n"; + errs() << "Virtual register " << printReg(Reg) + << " is not needed live through the block.\n"; } } } @@ -1672,7 +1754,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!LiveInts->hasInterval(Reg)) { report("Missing live interval for virtual register", MF); - errs() << PrintReg(Reg, TRI) << " still has defs or uses\n"; + errs() << printReg(Reg, TRI) << " still has defs or uses\n"; continue; } @@ -1887,7 +1969,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (MOI->isDef()) { if (Sub != 0) { hasSubRegDef = true; - // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane + // An operand %0:sub0 reads %0:sub1..n. Invert the lane // mask for subregister defs. Read-undef defs will be handled by // readsReg below. SLM = ~SLM; @@ -1935,7 +2017,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Skip this block. ++MFI; } - for (;;) { + while (true) { assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. if (!TargetRegisterInfo::isVirtualRegister(Reg) && @@ -1964,8 +2046,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); - errs() << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " + errs() << " live into " << printMBBReference(*MFI) << '@' + << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " << PEnd << '\n'; continue; } @@ -1974,9 +2056,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!IsPHI && PVNI != VNI) { report("Different value live out of predecessor", *PI); report_context(LR, Reg, LaneMask); - errs() << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id - << " live into BB#" << MFI->getNumber() << '@' + errs() << "Valno #" << PVNI->id << " live out of " + << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #" + << VNI->id << " live into " << printMBBReference(*MFI) << '@' << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } } @@ -2041,23 +2123,25 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } namespace { + // FrameSetup and FrameDestroy can have zero adjustment, so using a single // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the // value is zero. // We use a bool plus an integer to capture the stack state. struct StackStateOfBB { - StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false), - ExitIsSetup(false) { } + StackStateOfBB() = default; StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) : EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup), - ExitIsSetup(ExitSetup) { } + ExitIsSetup(ExitSetup) {} + // Can be negative, which means we are setting up a frame. - int EntryValue; - int ExitValue; - bool EntryIsSetup; - bool ExitIsSetup; + int EntryValue = 0; + int ExitValue = 0; + bool EntryIsSetup = false; + bool ExitIsSetup = false; }; -} + +} // end anonymous namespace /// Make sure on every path through the CFG, a FrameSetup <n> is always followed /// by a FrameDestroy <n>, stack adjustments are identical on all @@ -2073,8 +2157,8 @@ void MachineVerifier::verifyStackFrame() { df_iterator_default_set<const MachineBasicBlock*> Reachable; // Visit the MBBs in DFS order. - for (df_ext_iterator<const MachineFunction*, - df_iterator_default_set<const MachineBasicBlock*> > + for (df_ext_iterator<const MachineFunction *, + df_iterator_default_set<const MachineBasicBlock *>> DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); DFI != DFE; ++DFI) { const MachineBasicBlock *MBB = *DFI; @@ -2125,11 +2209,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" - << SPState[(*I)->getNumber()].ExitValue << ", " - << SPState[(*I)->getNumber()].ExitIsSetup - << "), while BB#" << MBB->getNumber() << " has entry state (" - << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + errs() << "Predecessor " << printMBBReference(*(*I)) + << " has exit state (" << SPState[(*I)->getNumber()].ExitValue + << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while " + << printMBBReference(*MBB) << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; } } @@ -2141,11 +2225,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - errs() << "Successor BB#" << (*I)->getNumber() << " has entry state (" - << SPState[(*I)->getNumber()].EntryValue << ", " - << SPState[(*I)->getNumber()].EntryIsSetup - << "), while BB#" << MBB->getNumber() << " has exit state (" - << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + errs() << "Successor " << printMBBReference(*(*I)) + << " has entry state (" << SPState[(*I)->getNumber()].EntryValue + << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while " + << printMBBReference(*MBB) << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; } } diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 633a853b2c74..e7f426c469a0 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -19,10 +19,10 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #define DEBUG_TYPE "machine-scheduler" @@ -33,42 +33,74 @@ using namespace llvm; static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); -static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, +static bool isHazard(const SDep &Dep) { + return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; +} + +static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, SUnit &SecondSU) { + // Check that neither instr is already paired with another along the edge + // between them. + for (SDep &SI : FirstSU.Succs) + if (SI.isCluster()) + return false; + + for (SDep &SI : SecondSU.Preds) + if (SI.isCluster()) + return false; + // Though the reachability checks above could be made more generic, + // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid, + // the extra computation cost makes it less interesting in general cases. + // Create a single weak edge between the adjacent instrs. The only effect is // to cause bottom-up scheduling to heavily prioritize the clustered instrs. - DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster))) + return false; - // Adjust the latency between the anchor instr and its - // predecessors. - for (SDep &IDep : SecondSU.Preds) - if (IDep.getSUnit() == &FirstSU) - IDep.setLatency(0); + // Adjust the latency between both instrs. + for (SDep &SI : FirstSU.Succs) + if (SI.getSUnit() == &SecondSU) + SI.setLatency(0); - // Adjust the latency between the dependent instr and its - // predecessors. - for (SDep &IDep : FirstSU.Succs) - if (IDep.getSUnit() == &SecondSU) - IDep.setLatency(0); + for (SDep &SI : SecondSU.Preds) + if (SI.getSUnit() == &FirstSU) + SI.setLatency(0); - DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse "; + DEBUG(dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - "; SecondSU.print(dbgs(), &DAG); dbgs() << " / "; dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + // Make data dependencies from the FirstSU also dependent on the SecondSU to + // prevent them from being scheduled between the FirstSU and the SecondSU. if (&SecondSU != &DAG.ExitSU) - // Make instructions dependent on FirstSU also dependent on SecondSU to - // prevent them from being scheduled between FirstSU and and SecondSU. for (const SDep &SI : FirstSU.Succs) { - if (SI.getSUnit() == &SecondSU) + SUnit *SU = SI.getSUnit(); + if (SI.isWeak() || isHazard(SI) || + SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU)) + continue; + DEBUG(dbgs() << " Bind "; + SecondSU.print(dbgs(), &DAG); dbgs() << " - "; + SU->print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial)); + } + + // Make the FirstSU also dependent on the dependencies of the SecondSU to + // prevent them from being scheduled between the FirstSU and the SecondSU. + if (&FirstSU != &DAG.EntrySU) + for (const SDep &SI : SecondSU.Preds) { + SUnit *SU = SI.getSUnit(); + if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU)) continue; - DEBUG(dbgs() << " Copy Succ "; - SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';); - DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial)); + DEBUG(dbgs() << " Bind "; + SU->print(dbgs(), &DAG); dbgs() << " - "; + FirstSU.print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial)); } ++NumFused; + return true; } namespace { @@ -116,9 +148,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { // Explorer for fusion candidates among the dependencies of the anchor instr. for (SDep &Dep : AnchorSU.Preds) { - // Ignore dependencies that don't enforce ordering. - if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output || - Dep.isWeak()) + // Ignore dependencies other than data or strong ordering. + if (Dep.isWeak() || isHazard(Dep)) continue; SUnit &DepSU = *Dep.getSUnit(); @@ -129,8 +160,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) continue; - fuseInstructionPair(DAG, DepSU, AnchorSU); - return true; + if (fuseInstructionPair(DAG, DepSU, AnchorSU)) + return true; } return false; diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index f7aeb4204c5b..8972867ba083 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -1,4 +1,4 @@ -//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +//===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===// // // The LLVM Compiler Infrastructure // @@ -14,13 +14,17 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Pass.h" +#include <cassert> + using namespace llvm; #define DEBUG_TYPE "opt-phis" @@ -29,12 +33,14 @@ STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); namespace { + class OptimizePHIs : public MachineFunctionPass { MachineRegisterInfo *MRI; const TargetInstrInfo *TII; public: static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(ID) { initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); } @@ -47,23 +53,26 @@ namespace { } private: - typedef SmallPtrSet<MachineInstr*, 16> InstrSet; - typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator; + using InstrSet = SmallPtrSet<MachineInstr *, 16>; + using InstrSetIterator = SmallPtrSetIterator<MachineInstr *>; bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, InstrSet &PHIsInCycle); bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); bool OptimizeBB(MachineBasicBlock &MBB); }; -} + +} // end anonymous namespace char OptimizePHIs::ID = 0; + char &llvm::OptimizePHIsID = OptimizePHIs::ID; + INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE, "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; MRI = &Fn.getRegInfo(); @@ -144,7 +153,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { if (PHIsInCycle.size() == 16) return false; - for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) { + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DstReg)) { if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle)) return false; } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 9c898fa40d7e..54c5a940275d 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -1,4 +1,4 @@ -//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +//===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===// // // The LLVM Compiler Infrastructure // @@ -14,24 +14,35 @@ //===----------------------------------------------------------------------===// #include "PHIEliminationUtils.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "phi-node-elimination" @@ -51,6 +62,7 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit( cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true.")); namespace { + class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information LiveVariables *LV; @@ -58,6 +70,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(ID) { initializePHIEliminationPass(*PassRegistry::getPassRegistry()); } @@ -68,8 +81,8 @@ namespace { private: /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions /// in predecessor basic blocks. - /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt); @@ -78,7 +91,6 @@ namespace { /// register which is used in a PHI node. We map that to the BB the /// vreg is coming from. This is used later to determine when the vreg /// is killed in the BB. - /// void analyzePHINodes(const MachineFunction& Fn); /// Split critical edges where necessary for good coalescer performance. @@ -90,8 +102,8 @@ namespace { bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB); bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB); - typedef std::pair<unsigned, unsigned> BBVRegPair; - typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; + using BBVRegPair = std::pair<unsigned, unsigned>; + using VRegPHIUse = DenseMap<BBVRegPair, unsigned>; VRegPHIUse VRegPHIUseCount; @@ -99,17 +111,19 @@ namespace { SmallPtrSet<MachineInstr*, 4> ImpDefs; // Map reusable lowered PHI node -> incoming join register. - typedef DenseMap<MachineInstr*, unsigned, - MachineInstrExpressionTrait> LoweredPHIMap; + using LoweredPHIMap = + DenseMap<MachineInstr*, unsigned, MachineInstrExpressionTrait>; LoweredPHIMap LoweredPHIs; }; -} + +} // end anonymous namespace STATISTIC(NumLowered, "Number of phis lowered"); STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; + char& llvm::PHIEliminationID = PHIElimination::ID; INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE, @@ -182,7 +196,6 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. -/// bool PHIElimination::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { if (MBB.empty() || !MBB.front().isPHI()) @@ -219,9 +232,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, return true; } - -/// LowerPHINode - Lower the PHI node at the top of the specified block, -/// +/// LowerPHINode - Lower the PHI node at the top of the specified block. void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; @@ -259,7 +270,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, IncomingReg = entry; reusedIncoming = true; ++NumReused; - DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi); + DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); @@ -534,7 +545,6 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, /// particular, we want to map the number of uses of a virtual register which is /// used in a PHI node. We map that to the BB the vreg is coming from. This is /// used later to determine when the vreg is killed in the BB. -/// void PHIElimination::analyzePHINodes(const MachineFunction& MF) { for (const auto &MBB : MF) for (const auto &BBI : MBB) { @@ -583,9 +593,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) continue; if (ShouldSplit) { - DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" - << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() - << ": " << *BBI); + DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge " + << printMBBReference(*PreMBB) << " -> " + << printMBBReference(MBB) << ": " << *BBI); } // If Reg is not live-in to MBB, it means it must be live-in to some diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp index 50dd44fa659f..ff8680a0540d 100644 --- a/lib/CodeGen/ParallelCG.cpp +++ b/lib/CodeGen/ParallelCG.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SplitModule.h" diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index 513e82716564..0957705b19bb 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -16,9 +16,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; @@ -54,11 +54,11 @@ static bool doesNotGeneratecode(const MachineInstr &MI) { } bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { - if (!MF.getFunction()->hasFnAttribute("patchable-function")) + if (!MF.getFunction().hasFnAttribute("patchable-function")) return false; #ifndef NDEBUG - Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function"); + Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function"); StringRef PatchType = PatchAttr.getValueAsString(); assert(PatchType == "prologue-short-redirect" && "Only possibility today!"); #endif diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index b13f6b68c420..45078081987a 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// +//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===// // // The LLVM Compiler Infrastructure // @@ -67,6 +67,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -74,20 +75,23 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstdint> #include <memory> @@ -170,11 +174,11 @@ namespace { } /// \brief Track Def -> Use info used for rewriting copies. - typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult> - RewriteMapTy; + using RewriteMapTy = + SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>; /// \brief Sequence of instructions that formulate recurrence cycle. - typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle; + using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>; private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); @@ -195,6 +199,7 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + /// \brief Finds recurrence cycles, but only ones that formulated around /// a def operand and a use operand that are tied. If there is a use /// operand commutable with the tied use operand, find recurrence cycle @@ -254,7 +259,7 @@ namespace { /// maintained with CommutePair. class RecurrenceInstr { public: - typedef std::pair<unsigned, unsigned> IndexPair; + using IndexPair = std::pair<unsigned, unsigned>; RecurrenceInstr(MachineInstr *MI) : MI(MI) {} RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) @@ -277,11 +282,12 @@ namespace { SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs; /// Instruction using the sources in 'RegSrcs'. - const MachineInstr *Inst; + const MachineInstr *Inst = nullptr; public: - ValueTrackerResult() : Inst(nullptr) {} - ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) { + ValueTrackerResult() = default; + + ValueTrackerResult(unsigned Reg, unsigned SubReg) { addSource(Reg, SubReg); } @@ -350,13 +356,17 @@ namespace { class ValueTracker { private: /// The current point into the use-def chain. - const MachineInstr *Def; + const MachineInstr *Def = nullptr; + /// The index of the definition in Def. - unsigned DefIdx; + unsigned DefIdx = 0; + /// The sub register index of the definition. unsigned DefSubReg; + /// The register where the value can be found. unsigned Reg; + /// Specifiy whether or not the value tracking looks through /// complex instructions. When this is false, the value tracker /// bails on everything that is not a copy or a bitcast. @@ -365,8 +375,10 @@ namespace { /// the ValueTracker class but that would have complicated the code of /// the users of this class. bool UseAdvancedTracking; + /// MachineRegisterInfo used to perform tracking. const MachineRegisterInfo &MRI; + /// Optional TargetInstrInfo used to perform some complex /// tracking. const TargetInstrInfo *TII; @@ -374,22 +386,29 @@ namespace { /// \brief Dispatcher to the right underlying implementation of /// getNextSource. ValueTrackerResult getNextSourceImpl(); + /// \brief Specialized version of getNextSource for Copy instructions. ValueTrackerResult getNextSourceFromCopy(); + /// \brief Specialized version of getNextSource for Bitcast instructions. ValueTrackerResult getNextSourceFromBitcast(); + /// \brief Specialized version of getNextSource for RegSequence /// instructions. ValueTrackerResult getNextSourceFromRegSequence(); + /// \brief Specialized version of getNextSource for InsertSubreg /// instructions. ValueTrackerResult getNextSourceFromInsertSubreg(); + /// \brief Specialized version of getNextSource for ExtractSubreg /// instructions. ValueTrackerResult getNextSourceFromExtractSubreg(); + /// \brief Specialized version of getNextSource for SubregToReg /// instructions. ValueTrackerResult getNextSourceFromSubregToReg(); + /// \brief Specialized version of getNextSource for PHI instructions. ValueTrackerResult getNextSourceFromPHI(); @@ -410,7 +429,7 @@ namespace { const MachineRegisterInfo &MRI, bool UseAdvancedTracking = false, const TargetInstrInfo *TII = nullptr) - : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), + : DefSubReg(DefSubReg), Reg(Reg), UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { Def = MRI.getVRegDef(Reg); @@ -453,6 +472,7 @@ namespace { } // end anonymous namespace char PeepholeOptimizer::ID = 0; + char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, @@ -659,7 +679,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, } /// \brief Check if a simpler conditional branch can be -// generated +/// generated bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { return TII->optimizeCondBranch(*MI); } @@ -805,13 +825,13 @@ class CopyRewriter { protected: /// The copy-like instruction. MachineInstr &CopyLike; + /// The index of the source being rewritten. - unsigned CurrentSrcIdx; + unsigned CurrentSrcIdx = 0; public: - CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} - - virtual ~CopyRewriter() {} + CopyRewriter(MachineInstr &MI) : CopyLike(MI) {} + virtual ~CopyRewriter() = default; /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and /// the related value that it affects (TrackReg, TrackSubReg). @@ -944,6 +964,7 @@ class UncoalescableRewriter : public CopyRewriter { protected: const TargetInstrInfo &TII; MachineRegisterInfo &MRI; + /// The number of defs in the bitcast unsigned NumDefs; @@ -958,7 +979,6 @@ public: /// All such sources need to be considered rewritable in order to /// rewrite a uncoalescable copy-like instruction. This method return /// each definition that must be checked if rewritable. - /// bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, unsigned &TrackReg, unsigned &TrackSubReg) override { @@ -1205,7 +1225,7 @@ public: } }; -} // end anonymous namespace +} // end anonymous namespace /// \brief Get the appropriated CopyRewriter for \p MI. /// \return A pointer to a dynamically allocated CopyRewriter or nullptr @@ -1433,10 +1453,10 @@ bool PeepholeOptimizer::foldImmediate( // only the first copy is considered. // // e.g. -// %vreg1 = COPY %vreg0 -// %vreg2 = COPY %vreg0:sub1 +// %1 = COPY %0 +// %2 = COPY %0:sub1 // -// Should replace %vreg2 uses with %vreg1:sub1 +// Should replace %2 uses with %1:sub1 bool PeepholeOptimizer::foldRedundantCopy( MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs, DenseMap<unsigned, MachineInstr *> &CopyMIs) { @@ -1496,7 +1516,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( unsigned DstReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { - // %vreg = COPY %PHYSREG + // %vreg = COPY %physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. NAPhysToVirtMIs.insert({SrcReg, MI}); @@ -1506,7 +1526,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) return false; - // %PHYSREG = COPY %vreg + // %physreg = COPY %vreg auto PrevCopy = NAPhysToVirtMIs.find(DstReg); if (PrevCopy == NAPhysToVirtMIs.end()) { // We can't remove the copy: there was an intervening clobber of the @@ -1601,16 +1621,16 @@ bool PeepholeOptimizer::findTargetRecurrence( /// from the phi. For example, if there is a recurrence of /// /// LoopHeader: -/// %vreg1 = phi(%vreg0, %vreg100) +/// %1 = phi(%0, %100) /// LoopLatch: -/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1 +/// %0<def, tied1> = ADD %2<def, tied0>, %1 /// -/// , the fact that vreg0 and vreg2 are in the same tied operands set makes +/// , the fact that %0 and %2 are in the same tied operands set makes /// the coalescing of copy instruction generated from the phi in -/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and -/// %vreg2 have overlapping live range. This introduces additional move -/// instruction to the final assembly. However, if we commute %vreg2 and -/// %vreg1 of ADD instruction, the redundant move instruction can be +/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and +/// %2 have overlapping live range. This introduces additional move +/// instruction to the final assembly. However, if we commute %2 and +/// %1 of ADD instruction, the redundant move instruction can be /// avoided. bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { SmallSet<unsigned, 2> TargetRegs; @@ -1642,7 +1662,7 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); @@ -1676,8 +1696,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // Track when a non-allocatable physical register is copied to a virtual // register so that useless moves can be removed. // - // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` - // without any intervening re-definition of %PHYSREG. + // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg` + // without any intervening re-definition of %physreg. DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs; // Set of virtual registers that are copied from. @@ -1847,7 +1867,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n"); FoldAsLoadDefCandidates.clear(); } - } } diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 4a50d895340a..f9d4a9746e41 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -31,11 +31,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "post-RA-hazard-rec" diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f2249f9e37e0..5d86faafdd85 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -34,15 +33,15 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "post-RA-sched" @@ -280,7 +279,7 @@ bool PostRAScheduler::enablePostRAScheduler( } bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; TII = Fn.getSubtarget().getInstrInfo(); @@ -322,8 +321,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getName() - << ":BB#" << MBB.getNumber() << " ***\n"; + dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":" + << printMBBReference(MBB) << " ***\n"; } #endif diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp index fbc2bc64f425..8f88ef78828a 100644 --- a/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -1,4 +1,4 @@ -//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===// +//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===// // // The LLVM Compiler Infrastructure // @@ -16,15 +16,15 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" using namespace llvm; -namespace { - -bool lowerLoadRelative(Function &F) { +static bool lowerLoadRelative(Function &F) { if (F.use_empty()) return false; @@ -55,7 +55,7 @@ bool lowerLoadRelative(Function &F) { return Changed; } -bool lowerIntrinsics(Module &M) { +static bool lowerIntrinsics(Module &M) { bool Changed = false; for (Function &F : M) { if (F.getName().startswith("llvm.load.relative.")) @@ -64,23 +64,26 @@ bool lowerIntrinsics(Module &M) { return Changed; } +namespace { + class PreISelIntrinsicLoweringLegacyPass : public ModulePass { public: static char ID; + PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {} - bool runOnModule(Module &M) { return lowerIntrinsics(M); } + bool runOnModule(Module &M) override { return lowerIntrinsics(M); } }; +} // end anonymous namespace + char PreISelIntrinsicLoweringLegacyPass::ID; -} INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass, "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering", false, false) -namespace llvm { -ModulePass *createPreISelIntrinsicLoweringPass() { +ModulePass *llvm::createPreISelIntrinsicLoweringPass() { return new PreISelIntrinsicLoweringLegacyPass; } @@ -91,4 +94,3 @@ PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, else return PreservedAnalyses::none(); } -} // End llvm namespace diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 0118580a626a..48b48c5f6499 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -13,10 +13,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -154,7 +154,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { if (WorkList.empty()) continue; - DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() << " implicit defs.\n"); Changed = true; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e9f8d43fe643..a8d8ad8ac7dc 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -1,4 +1,4 @@ -//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===// // // The LLVM Compiler Infrastructure // @@ -16,77 +16,83 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <climits> +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <limits> +#include <utility> +#include <vector> using namespace llvm; #define DEBUG_TYPE "prologepilog" -typedef SmallVector<MachineBasicBlock *, 4> MBBVector; -static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCXFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks); +using MBBVector = SmallVector<MachineBasicBlock *, 4>; namespace { + class PEI : public MachineFunctionPass { public: static char ID; + PEI() : MachineFunctionPass(ID) { initializePEIPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override; - MachineFunctionProperties getRequiredProperties() const override { - MachineFunctionProperties MFP; - if (UsesCalleeSaves) - MFP.set(MachineFunctionProperties::Property::NoVRegs); - return MFP; - } - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. - /// bool runOnMachineFunction(MachineFunction &Fn) override; private: - std::function<void(MachineFunction &MF, RegScavenger *RS, - unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks)> - SpillCalleeSavedRegisters; - std::function<void(MachineFunction &MF, RegScavenger &RS)> - ScavengeFrameVirtualRegs; - - bool UsesCalleeSaves = false; - RegScavenger *RS; // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved @@ -108,8 +114,12 @@ private: // FrameIndexVirtualScavenging is used. bool FrameIndexEliminationScavenging; + // Emit remarks. + MachineOptimizationRemarkEmitter *ORE = nullptr; + void calculateCallFrameInfo(MachineFunction &Fn); void calculateSaveRestoreBlocks(MachineFunction &Fn); + void spillCalleeSavedRegs(MachineFunction &MF); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); @@ -117,9 +127,11 @@ private: int &SPAdj); void insertPrologEpilogCode(MachineFunction &Fn); }; -} // namespace + +} // end anonymous namespace char PEI::ID = 0; + char &llvm::PrologEpilogCodeInserterID = PEI::ID; static cl::opt<unsigned> @@ -132,6 +144,7 @@ INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion & Frame Finalization", false, false) @@ -148,32 +161,17 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<StackProtector>(); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); MachineFunctionPass::getAnalysisUsage(AU); } - /// StackObjSet - A set of stack object indexes -typedef SmallSetVector<int, 8> StackObjSet; +using StackObjSet = SmallSetVector<int, 8>; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. -/// bool PEI::runOnMachineFunction(MachineFunction &Fn) { - if (!SpillCalleeSavedRegisters) { - const TargetMachine &TM = Fn.getTarget(); - if (!TM.usesPhysRegsForPEI()) { - SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, - unsigned &, unsigned &, const MBBVector &, - const MBBVector &) {}; - ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; - } else { - SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; - ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; - UsesCalleeSaves = true; - } - } - - const Function* F = Fn.getFunction(); + const Function &F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -181,6 +179,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || TRI->requiresFrameIndexReplacementScavenging(Fn); + ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo @@ -192,8 +191,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateSaveRestoreBlocks(Fn); // Handle CSR spilling and restoring, for targets that need it. - SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, - SaveBlocks, RestoreBlocks); + if (Fn.getTarget().usesPhysRegsForPEI()) + spillCalleeSavedRegs(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -207,7 +206,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttribute(Attribute::Naked)) + if (!F.hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -218,19 +217,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. - if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { - ScavengeFrameVirtualRegs(Fn, *RS); - - // Clear any vregs created by virtual scavenging. - Fn.getRegInfo().clearVirtRegs(); - } + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn, *RS); // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = Fn.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { - DiagnosticInfoStackSize DiagStackSize(*F, StackSize); - F->getContext().diagnose(DiagStackSize); + DiagnosticInfoStackSize DiagStackSize(F, StackSize); + F.getContext().diagnose(DiagStackSize); } delete RS; @@ -459,87 +454,63 @@ static void updateLiveness(MachineFunction &MF) { } } -/// insertCSRSpillsAndRestores - Insert spill and restore code for -/// callee saved registers used in the function. -/// -static void insertCSRSpillsAndRestores(MachineFunction &Fn, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks) { - // Get callee saved register information. - MachineFrameInfo &MFI = Fn.getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - - MFI.setCalleeSavedInfoValid(true); - - // Early exit if no callee saved registers are modified! - if (CSI.empty()) - return; - +/// Insert restore code for the callee-saved registers used in the function. +static void insertCSRSaves(MachineBasicBlock &SaveBlock, + ArrayRef<CalleeSavedInfo> CSI) { + MachineFunction &Fn = *SaveBlock.getParent(); const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); - MachineBasicBlock::iterator I; - - // Spill using target interface. - for (MachineBasicBlock *SaveBlock : SaveBlocks) { - I = SaveBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), - RC, TRI); - } + + MachineBasicBlock::iterator I = SaveBlock.begin(); + if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { + for (const CalleeSavedInfo &CS : CSI) { + // Insert the spill to the stack frame. + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, + TRI); } - // Update the live-in information of all the blocks up to the save point. - updateLiveness(Fn); } +} - // Restore using target interface. - for (MachineBasicBlock *MBB : RestoreBlocks) { - I = MBB->end(); - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - - bool AtStart = I == MBB->begin(); - MachineBasicBlock::iterator BeforeI = I; - if (!AtStart) - --BeforeI; - - // Restore all registers immediately before the return and any - // terminators that precede it. - if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; - } - } +/// Insert restore code for the callee-saved registers used in the function. +static void insertCSRRestores(MachineBasicBlock &RestoreBlock, + std::vector<CalleeSavedInfo> &CSI) { + MachineFunction &Fn = *RestoreBlock.getParent(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + + // Restore all registers immediately before the return and any + // terminators that precede it. + MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); + + if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { + for (const CalleeSavedInfo &CI : reverse(CSI)) { + unsigned Reg = CI.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); + assert(I != RestoreBlock.begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. } } } -static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks) { - const Function *F = Fn.getFunction(); +void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { + // We can't list this requirement in getRequiredProperties because some + // targets (WebAssembly) use virtual registers past this point, and the pass + // pipeline is set up without giving the passes a chance to look at the + // TargetMachine. + // FIXME: Find a way to express this in getRequiredProperties. + assert(Fn.getProperties().hasProperty( + MachineFunctionProperties::Property::NoVRegs)); + + const Function &F = Fn.getFunction(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); MinCSFrameIndex = std::numeric_limits<unsigned>::max(); MaxCSFrameIndex = 0; @@ -551,8 +522,21 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); // Add the code to save and restore the callee saved registers. - if (!F->hasFnAttribute(Attribute::Naked)) - insertCSRSpillsAndRestores(Fn, SaveBlocks, RestoreBlocks); + if (!F.hasFnAttribute(Attribute::Naked)) { + MFI.setCalleeSavedInfoValid(true); + + std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); + if (!CSI.empty()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + insertCSRSaves(*SaveBlock, CSI); + // Update the live-in information of all the blocks up to the save + // point. + updateLiveness(Fn); + } + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) + insertCSRRestores(*RestoreBlock, CSI); + } + } } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. @@ -585,7 +569,6 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, /// Compute which bytes of fixed and callee-save stack area are unused and keep /// track of them in StackBytesFree. -/// static inline void computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, @@ -626,7 +609,6 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, /// Assign frame object to an unused portion of the stack in the fixed stack /// object range. Return true if the allocation was successful. -/// static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, bool StackGrowsDown, unsigned MaxAlign, BitVector &StackBytesFree) { @@ -703,7 +685,6 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs, /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. -/// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis<StackProtector>(); @@ -825,7 +806,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Retrieve the Exception Handler registration node. - int EHRegNodeFrameIndex = INT_MAX; + int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; @@ -903,7 +884,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Allocate the EH registration node first if one is present. - if (EHRegNodeFrameIndex != INT_MAX) + if (EHRegNodeFrameIndex != std::numeric_limits<int>::max()) AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, MaxAlign, Skew); @@ -968,12 +949,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { int64_t StackSize = Offset - LocalAreaOffset; MFI.setStackSize(StackSize); NumBytesStackSpace += StackSize; + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", + Fn.getFunction().getSubprogram(), + &Fn.front()) + << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; + }); } /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. -/// void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); @@ -995,21 +982,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { if (Fn.shouldSplitStack()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForSegmentedStacks(Fn, *SaveBlock); - } + // Record that there are split-stack functions, so we will emit a + // special section to tell the linker. + Fn.getMMI().setHasSplitStack(true); + } else + Fn.getMMI().setHasNosplitStack(true); // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The // approach is rather similar to that of Segmented Stacks, but it uses a // different conditional check and another BIF for allocating more stack // space. - if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + if (Fn.getFunction().getCallingConv() == CallingConv::HiPE) for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. -/// void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; @@ -1059,7 +1049,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, bool InsideCallSequence = false; for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (TII.isFrameInstr(*I)) { InsideCallSequence = TII.isFrameSetup(*I); SPAdj += TII.getSPAdjust(*I); @@ -1081,11 +1070,12 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; - MachineOperand &Offset = MI.getOperand(1); - Offset.setImm( - Offset.getImm() + - TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg)); + int64_t Offset = + TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg); MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(), + DIExpression::NoDeref, Offset); + MI.getOperand(3).setMetadata(DIExpr); continue; } diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index b29e62bf1aa3..86fd87450521 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" @@ -24,7 +25,11 @@ static const char *const PSVNames[] = { "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; -PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {} +PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII) + : Kind(Kind) { + AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind); +} + PseudoSourceValue::~PseudoSourceValue() {} @@ -75,8 +80,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } -CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind) - : PseudoSourceValue(Kind) {} +CallEntryPseudoSourceValue::CallEntryPseudoSourceValue( + PSVKind Kind, const TargetInstrInfo &TII) + : PseudoSourceValue(Kind, TII) {} bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { return false; @@ -91,16 +97,20 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const { } GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue( - const GlobalValue *GV) - : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {} - -ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES) - : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {} - -PseudoSourceValueManager::PseudoSourceValueManager() - : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT), - JumpTablePSV(PseudoSourceValue::JumpTable), - ConstantPoolPSV(PseudoSourceValue::ConstantPool) {} + const GlobalValue *GV, + const TargetInstrInfo &TII) + : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {} +ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue( + const char *ES, const TargetInstrInfo &TII) + : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {} + +PseudoSourceValueManager::PseudoSourceValueManager( + const TargetInstrInfo &TIInfo) + : TII(TIInfo), + StackPSV(PseudoSourceValue::Stack, TII), + GOTPSV(PseudoSourceValue::GOT, TII), + JumpTablePSV(PseudoSourceValue::JumpTable, TII), + ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {} const PseudoSourceValue *PseudoSourceValueManager::getStack() { return &StackPSV; @@ -116,10 +126,11 @@ const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() { return &JumpTablePSV; } -const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) { +const PseudoSourceValue * +PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI]; if (!V) - V = llvm::make_unique<FixedStackPseudoSourceValue>(FI); + V = llvm::make_unique<FixedStackPseudoSourceValue>(FI, TII); return V.get(); } @@ -128,7 +139,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { std::unique_ptr<const GlobalValuePseudoSourceValue> &E = GlobalCallEntries[GV]; if (!E) - E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV); + E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV, TII); return E.get(); } @@ -137,6 +148,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E = ExternalCallEntries[ES]; if (!E) - E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES); + E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII); return E.get(); } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index 8f19e432ab79..2fcbd1280da4 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -33,7 +33,7 @@ It also increase the likelihood the store may become dead. bb27 ... ... %reg1037 = ADDri %reg1039, 1 - %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 + %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10 Successors according to CFG: 0x8b03bf0 (#5) bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 7b4fbace2c1c..74c1592634aa 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===// +//===- RegAllocBase.cpp - Register Allocator Base Class -------------------===// // // The LLVM Compiler Infrastructure // @@ -14,19 +14,22 @@ #include "RegAllocBase.h" #include "Spiller.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> using namespace llvm; @@ -37,8 +40,8 @@ STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside // MachineVerifier. static cl::opt<bool, true> -VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), - cl::desc("Verify during register allocation")); + VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::Hidden, cl::desc("Verify during register allocation")); const char RegAllocBase::TimerGroupName[] = "regalloc"; const char RegAllocBase::TimerGroupDescription[] = "Register Allocation"; @@ -103,7 +106,9 @@ void RegAllocBase::allocatePhysRegs() { DEBUG(dbgs() << "\nselectOrSplit " << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); - typedef SmallVector<unsigned, 4> VirtRegVec; + + using VirtRegVec = SmallVector<unsigned, 4>; + VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index d8921b5ce6db..686ffc36e049 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -1,4 +1,4 @@ -//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===// +//===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -37,17 +37,20 @@ #ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H #define LLVM_LIB_CODEGEN_REGALLOCBASE_H -#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { -template<typename T> class SmallVectorImpl; -class TargetRegisterInfo; -class VirtRegMap; +class LiveInterval; class LiveIntervals; class LiveRegMatrix; +class MachineInstr; +class MachineRegisterInfo; +template<typename T> class SmallVectorImpl; class Spiller; +class TargetRegisterInfo; +class VirtRegMap; /// RegAllocBase provides the register allocation driver and interface that can /// be extended to add interesting heuristics. @@ -57,12 +60,13 @@ class Spiller; /// assignment order. class RegAllocBase { virtual void anchor(); + protected: - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - VirtRegMap *VRM; - LiveIntervals *LIS; - LiveRegMatrix *Matrix; + const TargetRegisterInfo *TRI = nullptr; + MachineRegisterInfo *MRI = nullptr; + VirtRegMap *VRM = nullptr; + LiveIntervals *LIS = nullptr; + LiveRegMatrix *Matrix = nullptr; RegisterClassInfo RegClassInfo; /// Inst which is a def of an original reg and whose defs are already all @@ -71,10 +75,8 @@ protected: /// always available for the remat of all the siblings of the original reg. SmallPtrSet<MachineInstr *, 32> DeadRemats; - RegAllocBase() - : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {} - - virtual ~RegAllocBase() {} + RegAllocBase() = default; + virtual ~RegAllocBase() = default; // A RegAlloc pass should call this before allocatePhysRegs. void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); @@ -120,4 +122,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_REGALLOCBASE_H diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 774306154a89..6e273277804b 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -18,7 +18,7 @@ #include "Spiller.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -29,11 +29,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cstdlib> #include <queue> @@ -143,14 +143,17 @@ INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, false) bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { + LiveInterval &LI = LIS->getInterval(VirtReg); if (VRM->hasPhys(VirtReg)) { - LiveInterval &LI = LIS->getInterval(VirtReg); Matrix->unassign(LI); aboutToRemoveInterval(LI); return true; } // Unassigned virtreg is probably in the priority queue. // RegAllocBase will erase it after dequeueing. + // Nonetheless, clear the live-range so that the debug + // dump will show the right state for that VirtReg. + LI.clear(); return false; } @@ -216,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Intfs.push_back(Intf); } } - DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << - " interferences with " << VirtReg << "\n"); + DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI) + << " interferences with " << VirtReg << "\n"); assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index d5538be4bba2..6a5282cbbbff 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// +//===- RegAllocFast.cpp - A fast register allocator for debug code --------===// // // The LLVM Compiler Infrastructure // @@ -7,32 +7,47 @@ // //===----------------------------------------------------------------------===// // -// This register allocator allocates registers to a basic block at a time, -// attempting to keep values in registers and reusing registers as appropriate. +/// \file This register allocator allocates registers to a basic block at a +/// time, attempting to keep values in registers and reusing registers as +/// appropriate. // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <algorithm> +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <tuple> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -45,109 +60,106 @@ static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); namespace { - class RAFast : public MachineFunctionPass { + + class RegAllocFast : public MachineFunctionPass { public: static char ID; - RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), - isBulkSpilling(false) {} + + RegAllocFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} private: - MachineFunction *MF; + MachineFrameInfo *MFI; MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; - // Basic block currently being allocated. + /// Basic block currently being allocated. MachineBasicBlock *MBB; - // StackSlotForVirtReg - Maps virtual regs to the frame index where these - // values are spilled. + /// Maps virtual regs to the frame index where these values are spilled. IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; - // Everything we know about a live virtual register. + /// Everything we know about a live virtual register. struct LiveReg { - MachineInstr *LastUse; // Last instr to use reg. - unsigned VirtReg; // Virtual register number. - unsigned PhysReg; // Currently held here. - unsigned short LastOpNum; // OpNum on LastUse. - bool Dirty; // Register needs spill. + MachineInstr *LastUse = nullptr; ///< Last instr to use reg. + unsigned VirtReg; ///< Virtual register number. + MCPhysReg PhysReg = 0; ///< Currently held here. + unsigned short LastOpNum = 0; ///< OpNum on LastUse. + bool Dirty = false; ///< Register needs spill. - explicit LiveReg(unsigned v) - : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){} + explicit LiveReg(unsigned v) : VirtReg(v) {} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; - typedef SparseSet<LiveReg> LiveRegMap; + using LiveRegMap = SparseSet<LiveReg>; - // LiveVirtRegs - This map contains entries for each virtual register - // that is currently available in a physical register. + /// This map contains entries for each virtual register that is currently + /// available in a physical register. LiveRegMap LiveVirtRegs; - DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap; - // RegState - Track the state of a physical register. + /// Track the state of a physical register. enum RegState { - // A disabled register is not available for allocation, but an alias may - // be in use. A register can only be moved out of the disabled state if - // all aliases are disabled. + /// A disabled register is not available for allocation, but an alias may + /// be in use. A register can only be moved out of the disabled state if + /// all aliases are disabled. regDisabled, - // A free register is not currently in use and can be allocated - // immediately without checking aliases. + /// A free register is not currently in use and can be allocated + /// immediately without checking aliases. regFree, - // A reserved register has been assigned explicitly (e.g., setting up a - // call parameter), and it remains reserved until it is used. + /// A reserved register has been assigned explicitly (e.g., setting up a + /// call parameter), and it remains reserved until it is used. regReserved - // A register state may also be a virtual register number, indication that - // the physical register is currently allocated to a virtual register. In - // that case, LiveVirtRegs contains the inverse mapping. + /// A register state may also be a virtual register number, indication + /// that the physical register is currently allocated to a virtual + /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - // PhysRegState - One of the RegState enums, or a virtreg. + /// One of the RegState enums, or a virtreg. std::vector<unsigned> PhysRegState; - // Set of register units. - typedef SparseSet<unsigned> UsedInInstrSet; + SmallVector<unsigned, 16> VirtDead; + SmallVector<MachineInstr *, 32> Coalesced; + + /// Set of register units. + using UsedInInstrSet = SparseSet<unsigned>; - // Set of register units that are used in the current instruction, and so - // cannot be allocated. + /// Set of register units that are used in the current instruction, and so + /// cannot be allocated. UsedInInstrSet UsedInInstr; - // Mark a physreg as used in this instruction. - void markRegUsedInInstr(unsigned PhysReg) { + /// Mark a physreg as used in this instruction. + void markRegUsedInInstr(MCPhysReg PhysReg) { for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) UsedInInstr.insert(*Units); } - // Check if a physreg or any of its aliases are used in this instruction. - bool isRegUsedInInstr(unsigned PhysReg) const { + /// Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(MCPhysReg PhysReg) const { for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) if (UsedInInstr.count(*Units)) return true; return false; } - // SkippedInstrs - Descriptors of instructions whose clobber list was - // ignored because all registers were spilled. It is still necessary to - // mark all the clobbered registers as used by the function. - SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs; - - // isBulkSpilling - This flag is set when LiveRegMap will be cleared - // completely after spilling all live registers. LiveRegMap entries should - // not be erased. - bool isBulkSpilling; + /// This flag is set when LiveRegMap will be cleared completely after + /// spilling all live registers. LiveRegMap entries should not be erased. + bool isBulkSpilling = false; enum : unsigned { spillClean = 1, spillDirty = 100, spillImpossible = ~0u }; + public: StringRef getPassName() const override { return "Fast Register Allocator"; } @@ -168,29 +180,32 @@ namespace { private: bool runOnMachineFunction(MachineFunction &Fn) override; - void AllocateBasicBlock(); - void handleThroughOperands(MachineInstr *MI, + void allocateBasicBlock(MachineBasicBlock &MBB); + void handleThroughOperands(MachineInstr &MI, SmallVectorImpl<unsigned> &VirtDead); - int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - bool isLastUseOfLocalReg(MachineOperand&); + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC); + bool isLastUseOfLocalReg(const MachineOperand &MO) const; - void addKillFlag(const LiveReg&); - void killVirtReg(LiveRegMap::iterator); + void addKillFlag(const LiveReg &LRI); + void killVirtReg(LiveRegMap::iterator LRI); void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); - void usePhysReg(MachineOperand&); - void definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState); - unsigned calcSpillCost(unsigned PhysReg) const; - void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); + void usePhysReg(MachineOperand &MO); + void definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, RegState NewState); + unsigned calcSpillCost(MCPhysReg PhysReg) const; + void assignVirtToPhysReg(LiveReg&, MCPhysReg PhysReg); + LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } + LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } - LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); + + LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, MCPhysReg PhysReg); LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator, unsigned Hint); LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, @@ -198,35 +213,41 @@ namespace { LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); - bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); + bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg); + + void dumpState(); }; - char RAFast::ID = 0; -} -INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false) +} // end anonymous namespace + +char RegAllocFast::ID = 0; + +INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, + false) -/// getStackSpaceFor - This allocates space for the specified virtual register -/// to be held on the stack. -int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { +/// This allocates space for the specified virtual register to be held on the +/// stack. +int RegAllocFast::getStackSpaceFor(unsigned VirtReg, + const TargetRegisterClass &RC) { // Find the location Reg would belong... int SS = StackSlotForVirtReg[VirtReg]; + // Already has space allocated? if (SS != -1) - return SS; // Already has space allocated? + return SS; // Allocate a new stack object for this spill location... - unsigned Size = TRI->getSpillSize(*RC); - unsigned Align = TRI->getSpillAlignment(*RC); - int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align); + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + int FrameIdx = MFI->CreateSpillStackObject(Size, Align); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; return FrameIdx; } -/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to -/// its virtual register, and it is guaranteed to be a block-local register. -/// -bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { +/// Return true if MO is the only remaining reference to its virtual register, +/// and it is guaranteed to be a block-local register. +bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const { // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) @@ -239,8 +260,8 @@ bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { return ++I == MRI->reg_nodbg_end(); } -/// addKillFlag - Set kill flags on last use of a virtual register. -void RAFast::addKillFlag(const LiveReg &LR) { +/// Set kill flags on last use of a virtual register. +void RegAllocFast::addKillFlag(const LiveReg &LR) { if (!LR.LastUse) return; MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { @@ -250,7 +271,7 @@ void RAFast::addKillFlag(const LiveReg &LR) { // subreg of this register and given we don't track which // lanes are actually dead, we cannot insert a kill flag here. // Otherwise we may end up in a situation like this: - // ... = (MO) physreg:sub1, physreg <implicit-use, kill> + // ... = (MO) physreg:sub1, implicit killed physreg // ... <== Here we would allow later pass to reuse physreg:sub1 // which is potentially wrong. // LR:sub0 = ... @@ -258,8 +279,8 @@ void RAFast::addKillFlag(const LiveReg &LR) { } } -/// killVirtReg - Mark virtreg as no longer available. -void RAFast::killVirtReg(LiveRegMap::iterator LRI) { +/// Mark virtreg as no longer available. +void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) { addKillFlag(*LRI); assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && "Broken RegState mapping"); @@ -269,8 +290,8 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) { LiveVirtRegs.erase(LRI); } -/// killVirtReg - Mark virtreg as no longer available. -void RAFast::killVirtReg(unsigned VirtReg) { +/// Mark virtreg as no longer available. +void RegAllocFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); @@ -278,9 +299,10 @@ void RAFast::killVirtReg(unsigned VirtReg) { killVirtReg(LRI); } -/// spillVirtReg - This method spills the value specified by VirtReg into the -/// corresponding stack slot if needed. -void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { +/// This method spills the value specified by VirtReg into the corresponding +/// stack slot if needed. +void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, + unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); @@ -288,9 +310,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { spillVirtReg(MI, LRI); } -/// spillVirtReg - Do the actual work of spilling. -void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, - LiveRegMap::iterator LRI) { +/// Do the actual work of spilling. +void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, + LiveRegMap::iterator LRI) { LiveReg &LR = *LRI; assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); @@ -299,12 +321,12 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // instruction, not on the spill. bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) - << " in " << PrintReg(LR.PhysReg, TRI)); - const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg); + DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) + << " in " << printReg(LR.PhysReg, TRI)); + const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg); int FI = getStackSpaceFor(LRI->VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FI << "\n"); - TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI); ++NumStores; // Update statistics // If this register is used by DBG_VALUE then insert new DBG_VALUE to @@ -312,8 +334,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // value. SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[LRI->VirtReg]; - for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { - MachineInstr *DBG = LRIDbgValues[li]; + for (MachineInstr *DBG : LRIDbgValues) { MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; @@ -329,32 +350,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, killVirtReg(LRI); } -/// spillAll - Spill all dirty virtregs without killing them. -void RAFast::spillAll(MachineBasicBlock::iterator MI) { +/// Spill all dirty virtregs without killing them. +void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); - i != e; ++i) - spillVirtReg(MI, i); + for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end(); + I != E; ++I) + spillVirtReg(MI, I); LiveVirtRegs.clear(); isBulkSpilling = false; } -/// usePhysReg - Handle the direct use of a physical register. -/// Check that the register is not used by a virtreg. -/// Kill the physreg, marking it free. -/// This may add implicit kills to MO->getParent() and invalidate MO. -void RAFast::usePhysReg(MachineOperand &MO) { - unsigned PhysReg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Bad usePhysReg operand"); - +/// Handle the direct use of a physical register. Check that the register is +/// not used by a virtreg. Kill the physreg, marking it free. This may add +/// implicit kills to MO->getParent() and invalidate MO. +void RegAllocFast::usePhysReg(MachineOperand &MO) { // Ignore undef uses. if (MO.isUndef()) return; + unsigned PhysReg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Bad usePhysReg operand"); + markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: @@ -373,7 +393,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { // Maybe a superregister is reserved? for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - unsigned Alias = *AI; + MCPhysReg Alias = *AI; switch (PhysRegState[Alias]) { case regDisabled: break; @@ -411,11 +431,11 @@ void RAFast::usePhysReg(MachineOperand &MO) { MO.setIsKill(); } -/// definePhysReg - Mark PhysReg as reserved or free after spilling any -/// virtregs. This is very similar to defineVirtReg except the physreg is -/// reserved instead of allocated. -void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg, - RegState NewState) { +/// Mark PhysReg as reserved or free after spilling any virtregs. This is very +/// similar to defineVirtReg except the physreg is reserved instead of +/// allocated. +void RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, + RegState NewState) { markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: @@ -432,7 +452,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg, // This is a disabled register, disable all aliases. PhysRegState[PhysReg] = NewState; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - unsigned Alias = *AI; + MCPhysReg Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -449,15 +469,13 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg, } } - -// calcSpillCost - Return the cost of spilling clearing out PhysReg and -// aliases so it is free for allocation. -// Returns 0 when PhysReg is free or disabled with all aliases disabled - it -// can be allocated directly. -// Returns spillImpossible when PhysReg or an alias can't be spilled. -unsigned RAFast::calcSpillCost(unsigned PhysReg) const { +/// \brief Return the cost of spilling clearing out PhysReg and aliases so it is +/// free for allocation. Returns 0 when PhysReg is free or disabled with all +/// aliases disabled - it can be allocated directly. +/// \returns spillImpossible when PhysReg or an alias can't be spilled. +unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { if (isRegUsedInInstr(PhysReg)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); + DEBUG(dbgs() << printReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -466,8 +484,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { case regFree: return 0; case regReserved: - DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " - << PrintReg(PhysReg, TRI) << " is reserved already.\n"); + DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " + << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); @@ -477,10 +495,10 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { } // This is a disabled register, add up cost of aliases. - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); + DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - unsigned Alias = *AI; + MCPhysReg Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -500,45 +518,37 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { return Cost; } - -/// assignVirtToPhysReg - This method updates local state so that we know -/// that PhysReg is the proper container for VirtReg now. The physical -/// register must not be used for anything else when this is called. -/// -void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) { - DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to " - << PrintReg(PhysReg, TRI) << "\n"); +/// \brief This method updates local state so that we know that PhysReg is the +/// proper container for VirtReg now. The physical register must not be used +/// for anything else when this is called. +void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { + DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " + << printReg(PhysReg, TRI) << "\n"); PhysRegState[PhysReg] = LR.VirtReg; assert(!LR.PhysReg && "Already assigned a physreg"); LR.PhysReg = PhysReg; } -RAFast::LiveRegMap::iterator -RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { +RegAllocFast::LiveRegMap::iterator +RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) { LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); assignVirtToPhysReg(*LRI, PhysReg); return LRI; } -/// allocVirtReg - Allocate a physical register for VirtReg. -RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI, - LiveRegMap::iterator LRI, - unsigned Hint) { +/// Allocates a physical register for VirtReg. +RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, + LiveRegMap::iterator LRI, unsigned Hint) { const unsigned VirtReg = LRI->VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - - // Ignore invalid hints. - if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || !MRI->isAllocatable(Hint))) - Hint = 0; - // Take hint when possible. - if (Hint) { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + if (TargetRegisterInfo::isPhysicalRegister(Hint) && + MRI->isAllocatable(Hint) && RC.contains(Hint)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint); if (Cost < spillDirty) { @@ -550,33 +560,32 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI, } } - ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC); - // First try to find a completely free register. - for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ - unsigned PhysReg = *I; + ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC); + for (MCPhysReg PhysReg : AO) { if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } } - DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " - << TRI->getRegClassName(RC) << "\n"); + DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " + << TRI->getRegClassName(&RC) << "\n"); - unsigned BestReg = 0, BestCost = spillImpossible; - for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ - unsigned Cost = calcSpillCost(*I); - DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); + unsigned BestReg = 0; + unsigned BestCost = spillImpossible; + for (MCPhysReg PhysReg : AO) { + unsigned Cost = calcSpillCost(PhysReg); + DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) { - assignVirtToPhysReg(*LRI, *I); + assignVirtToPhysReg(*LRI, PhysReg); return LRI; } if (Cost < BestCost) - BestReg = *I, BestCost = Cost; + BestReg = PhysReg, BestCost = Cost; } if (BestReg) { @@ -595,11 +604,11 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI, return assignVirtToPhysReg(VirtReg, *AO.begin()); } -/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. -RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +/// Allocates a register for VirtReg and mark it as dirty. +RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, + unsigned OpNum, + unsigned VirtReg, + unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; @@ -629,11 +638,11 @@ RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI, return LRI; } -/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. -RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +/// Make sure VirtReg is available in a physreg and return it. +RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, + unsigned OpNum, + unsigned VirtReg, + unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; @@ -642,11 +651,11 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI, MachineOperand &MO = MI.getOperand(OpNum); if (New) { LRI = allocVirtReg(MI, LRI, Hint); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " - << PrintReg(LRI->PhysReg, TRI) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI); + DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " + << printReg(LRI->PhysReg, TRI) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI); ++NumLoads; } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { @@ -665,7 +674,7 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI, } else if (MO.isKill()) { // We must remove kill flags from uses of reloaded registers because the // register would be killed immediately, and there might be a second use: - // %foo = OR %x<kill>, %x + // %foo = OR killed %x, %x // This would cause a second reload of %x into a different register. DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); @@ -680,241 +689,236 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI, return LRI; } -// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering -// subregs. This may invalidate any operand pointers. -// Return true if the operand kills its register. -bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { - MachineOperand &MO = MI->getOperand(OpNum); +/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This +/// may invalidate any operand pointers. Return true if the operand kills its +/// register. +bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum, + MCPhysReg PhysReg) { + MachineOperand &MO = MI.getOperand(OpNum); bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); + MO.setIsRenamableIfNoExtraRegAllocReq(); return MO.isKill() || Dead; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setIsRenamableIfNoExtraRegAllocReq(); MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super // register kill. if (MO.isKill()) { - MI->addRegisterKilled(PhysReg, TRI, true); + MI.addRegisterKilled(PhysReg, TRI, true); return true; } // A <def,read-undef> of a sub-register requires an implicit def of the full // register. if (MO.isDef() && MO.isUndef()) - MI->addRegisterDefined(PhysReg, TRI); + MI.addRegisterDefined(PhysReg, TRI); return Dead; } -// Handle special instruction operand like early clobbers and tied ops when +// Handles special instruction operand like early clobbers and tied ops when // there are additional physreg defines. -void RAFast::handleThroughOperands(MachineInstr *MI, - SmallVectorImpl<unsigned> &VirtDead) { +void RegAllocFast::handleThroughOperands(MachineInstr &MI, + SmallVectorImpl<unsigned> &VirtDead) { DEBUG(dbgs() << "Scanning for through registers:"); SmallSet<unsigned, 8> ThroughRegs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || - (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || + (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { if (ThroughRegs.insert(Reg).second) - DEBUG(dbgs() << ' ' << PrintReg(Reg)); + DEBUG(dbgs() << ' ' << printReg(Reg)); } } // If any physreg defines collide with preallocated through registers, // we must spill and reallocate. DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (ThroughRegs.count(PhysRegState[*AI])) - definePhysReg(*MI, *AI, regFree); + definePhysReg(MI, *AI, regFree); } } SmallVector<unsigned, 8> PartialDefs; DEBUG(dbgs() << "Allocating tied uses.\n"); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { - unsigned DefIdx = 0; - if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; - DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " - << DefIdx << ".\n"); - LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0); - unsigned PhysReg = LRI->PhysReg; - setPhysReg(MI, i, PhysReg); + if (!MO.isTied()) continue; + DEBUG(dbgs() << "Operand " << I << "("<< MO << ") is tied to operand " + << MI.findTiedOperandIdx(I) << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); + MCPhysReg PhysReg = LRI->PhysReg; + setPhysReg(MI, I, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. - } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. - LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0); + LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); PartialDefs.push_back(LRI->PhysReg); } } DEBUG(dbgs() << "Allocating early clobbers.\n"); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, 0); - unsigned PhysReg = LRI->PhysReg; - if (setPhysReg(MI, i, PhysReg)) + LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0); + MCPhysReg PhysReg = LRI->PhysReg; + if (setPhysReg(MI, I, PhysReg)) VirtDead.push_back(Reg); } // Restore UsedInInstr to a state usable for allocating normal virtual uses. UsedInInstr.clear(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) + DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) << " as used in instr\n"); markRegUsedInInstr(Reg); } // Also mark PartialDefs as used to avoid reallocation. - for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - markRegUsedInInstr(PartialDefs[i]); + for (unsigned PartialDef : PartialDefs) + markRegUsedInInstr(PartialDef); +} + +#ifndef NDEBUG +void RegAllocFast::dumpState() { + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << printReg(Reg, TRI); + switch(PhysRegState[Reg]) { + case regFree: + break; + case regReserved: + dbgs() << "*"; + break; + default: { + dbgs() << '=' << printReg(PhysRegState[Reg]); + LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + if (I->Dirty) + dbgs() << "*"; + assert(I->PhysReg == Reg && "Bad inverse map"); + break; + } + } + } + dbgs() << '\n'; + // Check that LiveVirtRegs is the inverse. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { + assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && + "Bad map key"); + assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && + "Bad map value"); + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + } } +#endif -void RAFast::AllocateBasicBlock() { - DEBUG(dbgs() << "\nAllocating " << *MBB); +void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { + this->MBB = &MBB; + DEBUG(dbgs() << "\nAllocating " << MBB); PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - MachineBasicBlock::iterator MII = MBB->begin(); + MachineBasicBlock::iterator MII = MBB.begin(); // Add live-in registers as live. - for (const auto &LI : MBB->liveins()) + for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins()) if (MRI->isAllocatable(LI.PhysReg)) definePhysReg(*MII, LI.PhysReg, regReserved); - SmallVector<unsigned, 8> VirtDead; - SmallVector<MachineInstr*, 32> Coalesced; + VirtDead.clear(); + Coalesced.clear(); // Otherwise, sequentially allocate each instruction in the MBB. - while (MII != MBB->end()) { - MachineInstr *MI = &*MII++; - const MCInstrDesc &MCID = MI->getDesc(); - DEBUG({ - dbgs() << "\n>> " << *MI << "Regs:"; - for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { - if (PhysRegState[Reg] == regDisabled) continue; - dbgs() << " " << TRI->getName(Reg); - switch(PhysRegState[Reg]) { - case regFree: - break; - case regReserved: - dbgs() << "*"; - break; - default: { - dbgs() << '=' << PrintReg(PhysRegState[Reg]); - LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - if (I->Dirty) - dbgs() << "*"; - assert(I->PhysReg == Reg && "Bad inverse map"); - break; - } - } - } - dbgs() << '\n'; - // Check that LiveVirtRegs is the inverse. - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), - e = LiveVirtRegs.end(); i != e; ++i) { - assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && - "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && - "Bad map value"); - assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); - } - }); + for (MachineInstr &MI : MBB) { + const MCInstrDesc &MCID = MI.getDesc(); + DEBUG( + dbgs() << "\n>> " << MI << "Regs:"; + dumpState() + ); // Debug values are not allowed to change codegen in any way. - if (MI->isDebugValue()) { - bool ScanDbgValue = true; - while (ScanDbgValue) { - ScanDbgValue = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LiveRegMap::iterator LRI = findLiveVirtReg(Reg); - if (LRI != LiveVirtRegs.end()) - setPhysReg(MI, i, LRI->PhysReg); - else { - int SS = StackSlotForVirtReg[Reg]; - if (SS == -1) { - // We can't allocate a physreg for a DebugValue, sorry! - DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); - } - else { - // Modify DBG_VALUE now that the value is in a spill slot. - bool IsIndirect = MI->isIndirectDebugValue(); - uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *Var = MI->getDebugVariable(); - const MDNode *Expr = MI->getDebugExpression(); - DebugLoc DL = MI->getDebugLoc(); - MachineBasicBlock *MBB = MI->getParent(); - assert( - cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, - TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SS) - .addImm(Offset) - .addMetadata(Var) - .addMetadata(Expr); - DEBUG(dbgs() << "Modifying debug info due to spill:" - << "\t" << *NewDV); - // Scan NewDV operands from the beginning. - MI = NewDV; - ScanDbgValue = true; - break; - } - } - LiveDbgValueMap[Reg].push_back(MI); + if (MI.isDebugValue()) { + MachineInstr *DebugMI = &MI; + MachineOperand &MO = DebugMI->getOperand(0); + + // Ignore DBG_VALUEs that aren't based on virtual registers. These are + // mostly constants and frame indices. + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // See if this virtual register has already been allocated to a physical + // register or spilled to a stack slot. + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(*DebugMI, 0, LRI->PhysReg); + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(*DebugMI, SS); + DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *DebugMI); + continue; } + + // We can't allocate a physreg for a DebugValue, sorry! + DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); + MO.setReg(0); } - // Next instruction. + + // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so + // that future spills of Reg will have DBG_VALUEs. + LiveDbgValueMap[Reg].push_back(DebugMI); continue; } // If this is a copy, we may be able to coalesce. - unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; - if (MI->isCopy()) { - CopyDst = MI->getOperand(0).getReg(); - CopySrc = MI->getOperand(1).getReg(); - CopyDstSub = MI->getOperand(0).getSubReg(); - CopySrcSub = MI->getOperand(1).getSubReg(); + unsigned CopySrcReg = 0; + unsigned CopyDstReg = 0; + unsigned CopySrcSub = 0; + unsigned CopyDstSub = 0; + if (MI.isCopy()) { + CopyDstReg = MI.getOperand(0).getReg(); + CopySrcReg = MI.getOperand(1).getReg(); + CopyDstSub = MI.getOperand(0).getSubReg(); + CopySrcSub = MI.getOperand(1).getSubReg(); } // Track registers used by instruction. @@ -928,8 +932,8 @@ void RAFast::AllocateBasicBlock() { bool hasEarlyClobbers = false; bool hasPartialRedefs = false; bool hasPhysDefs = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); // Make sure MRI knows about registers clobbered by regmasks. if (MO.isRegMask()) { MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); @@ -946,7 +950,7 @@ void RAFast::AllocateBasicBlock() { } else { if (MO.isEarlyClobber()) hasEarlyClobbers = true; - if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) hasPartialRedefs = true; } continue; @@ -955,7 +959,7 @@ void RAFast::AllocateBasicBlock() { if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { - definePhysReg(*MI, Reg, + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); hasEarlyClobbers = true; } else @@ -971,11 +975,11 @@ void RAFast::AllocateBasicBlock() { // sure the same register is allocated to uses and defs. // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. - if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. - CopyDst = 0; + CopyDstReg = 0; // Pretend we have early clobbers so the use operands get marked below. // This is not necessary for the common case of a single tied use. hasEarlyClobbers = true; @@ -983,16 +987,16 @@ void RAFast::AllocateBasicBlock() { // Second scan. // Allocate virtreg uses. - for (unsigned i = 0; i != VirtOpEnd; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned I = 0; I != VirtOpEnd; ++I) { + const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { - LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, CopyDst); - unsigned PhysReg = LRI->PhysReg; - CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; - if (setPhysReg(MI, i, PhysReg)) + LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg); + MCPhysReg PhysReg = LRI->PhysReg; + CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; + if (setPhysReg(MI, I, PhysReg)) killVirtReg(LRI); } } @@ -1001,19 +1005,18 @@ void RAFast::AllocateBasicBlock() { // this point. UsedInInstr.clear(); if (hasEarlyClobbers) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. - if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; + if (!MO.isDef() && !MO.isTied()) continue; markRegUsedInInstr(Reg); } } - unsigned DefOpEnd = MI->getNumOperands(); - if (MI->isCall()) { + unsigned DefOpEnd = MI.getNumOperands(); + if (MI.isCall()) { // Spill all virtregs before a call. This serves one purpose: If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots. @@ -1023,99 +1026,92 @@ void RAFast::AllocateBasicBlock() { // those for virtual registers in between. DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); - - // The imp-defs are skipped below, but we still need to mark those - // registers as used by the function. - SkippedInstrs.insert(&MCID); } // Third scan. // Allocate defs and collect dead defs. - for (unsigned i = 0; i != DefOpEnd; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned I = 0; I != DefOpEnd; ++I) { + const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(*MI, Reg, MO.isDead() ? regFree : regReserved); + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); continue; } - LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, CopySrc); - unsigned PhysReg = LRI->PhysReg; - if (setPhysReg(MI, i, PhysReg)) { + LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg); + MCPhysReg PhysReg = LRI->PhysReg; + if (setPhysReg(MI, I, PhysReg)) { VirtDead.push_back(Reg); - CopyDst = 0; // cancel coalescing; + CopyDstReg = 0; // cancel coalescing; } else - CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; + CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; } // Kill dead defs after the scan to ensure that multiple defs of the same // register are allocated identically. We didn't need to do this for uses // because we are crerating our own kill flags, and they are always at the // last use. - for (unsigned i = 0, e = VirtDead.size(); i != e; ++i) - killVirtReg(VirtDead[i]); + for (unsigned VirtReg : VirtDead) + killVirtReg(VirtReg); VirtDead.clear(); - if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { - DEBUG(dbgs() << "-- coalescing: " << *MI); - Coalesced.push_back(MI); + if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { + DEBUG(dbgs() << "-- coalescing: " << MI); + Coalesced.push_back(&MI); } else { - DEBUG(dbgs() << "<< " << *MI); + DEBUG(dbgs() << "<< " << MI); } } // Spill all physical registers holding virtual registers now. DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - spillAll(MBB->getFirstTerminator()); + spillAll(MBB.getFirstTerminator()); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. - for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) - MBB->erase(Coalesced[i]); + for (MachineInstr *MI : Coalesced) + MBB.erase(MI); NumCopies += Coalesced.size(); - DEBUG(MBB->dump()); + DEBUG(MBB.dump()); } -/// runOnMachineFunction - Register allocate the whole function -/// -bool RAFast::runOnMachineFunction(MachineFunction &Fn) { +/// Allocates registers for a function. +bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" - << "********** Function: " << Fn.getName() << '\n'); - MF = &Fn; - MRI = &MF->getRegInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - TII = MF->getSubtarget().getInstrInfo(); - MRI->freezeReservedRegs(Fn); - RegClassInfo.runOnMachineFunction(Fn); + << "********** Function: " << MF.getName() << '\n'); + MRI = &MF.getRegInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); + MFI = &MF.getFrameInfo(); + MRI->freezeReservedRegs(MF); + RegClassInfo.runOnMachineFunction(MF); UsedInInstr.clear(); UsedInInstr.setUniverse(TRI->getNumRegUnits()); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers - StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); - LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + StackSlotForVirtReg.resize(NumVirtRegs); + LiveVirtRegs.setUniverse(NumVirtRegs); // Loop over all of the basic blocks, eliminating virtual register references - for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); - MBBi != MBBe; ++MBBi) { - MBB = &*MBBi; - AllocateBasicBlock(); - } + for (MachineBasicBlock &MBB : MF) + allocateBasicBlock(MBB); // All machine operands and other references to virtual registers have been // replaced. Remove the virtual registers. MRI->clearVirtRegs(); - SkippedInstrs.clear(); StackSlotForVirtReg.clear(); LiveDbgValueMap.clear(); return true; } FunctionPass *llvm::createFastRegisterAllocator() { - return new RAFast(); + return new RegAllocFast(); } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 020e81eca2dd..186ef577e31d 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -30,12 +31,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -53,6 +54,9 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -65,10 +69,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -104,10 +105,11 @@ static cl::opt<unsigned> LastChanceRecoloringMaxInterference( " interference at a time"), cl::init(8)); -static cl::opt<bool> -ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, - cl::desc("Exhaustive Search for registers bypassing the depth " - "and interference cutoffs of last chance recoloring")); +static cl::opt<bool> ExhaustiveSearch( + "exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring"), + cl::Hidden); static cl::opt<bool> EnableLocalReassignment( "enable-local-reassign", cl::Hidden, @@ -129,6 +131,12 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden); +static cl::opt<bool> ConsiderLocalIntervalCost( + "condsider-local-interval-cost", cl::Hidden, + cl::desc("Consider the cost of local intervals created by a split " + "candidate when choosing the best split candidate."), + cl::init(false)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -277,6 +285,57 @@ class RAGreedy : public MachineFunctionPass, } }; + /// EvictionTrack - Keeps track of past evictions in order to optimize region + /// split decision. + class EvictionTrack { + + public: + using EvictorInfo = + std::pair<unsigned /* evictor */, unsigned /* physreg */>; + using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>; + + private: + /// Each Vreg that has been evicted in the last stage of selectOrSplit will + /// be mapped to the evictor Vreg and the PhysReg it was evicted from. + EvicteeInfo Evictees; + + public: + /// \brief Clear all eviction information. + void clear() { Evictees.clear(); } + + /// \brief Clear eviction information for the given evictee Vreg. + /// E.g. when Vreg get's a new allocation, the old eviction info is no + /// longer relevant. + /// \param Evictee The evictee Vreg for whom we want to clear collected + /// eviction info. + void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); } + + /// \brief Track new eviction. + /// The Evictor vreg has evicted the Evictee vreg from Physreg. + /// \praram PhysReg The phisical register Evictee was evicted from. + /// \praram Evictor The evictor Vreg that evicted Evictee. + /// \praram Evictee The evictee Vreg. + void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) { + Evictees[Evictee].first = Evictor; + Evictees[Evictee].second = PhysReg; + } + + /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. + /// \praram Evictee The evictee vreg. + /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if + /// nobody has evicted Evictee from PhysReg. + EvictorInfo getEvictor(unsigned Evictee) { + if (Evictees.count(Evictee)) { + return Evictees[Evictee]; + } + + return EvictorInfo(0, 0); + } + }; + + // Keeps track of past evictions in order to optimize region split decision. + EvictionTrack LastEvicted; + // splitting state. std::unique_ptr<SplitAnalysis> SA; std::unique_ptr<SplitEditor> SE; @@ -340,6 +399,10 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; + /// Enable or not the the consideration of the cost of local intervals created + /// by a split candidate when choosing the best split candidate. + bool EnableAdvancedRASplitCost; + /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; @@ -382,13 +445,24 @@ private: bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); void growRegion(GlobalSplitCandidate &Cand); - BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); + bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, + const AllocationOrder &Order, + bool *CanCauseEvictionChain); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, + SlotIndex Start, SlotIndex End, + EvictionCost &MaxCost); + unsigned getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, SlotIndex Start, + SlotIndex End, float *BestEvictWeight); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl<unsigned>&); bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, @@ -405,7 +479,8 @@ private: unsigned calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR); + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain = nullptr); /// Perform region splitting. unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, @@ -546,14 +621,17 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { //===----------------------------------------------------------------------===// bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { + LiveInterval &LI = LIS->getInterval(VirtReg); if (VRM->hasPhys(VirtReg)) { - LiveInterval &LI = LIS->getInterval(VirtReg); Matrix->unassign(LI); aboutToRemoveInterval(LI); return true; } // Unassigned virtreg is probably in the priority queue. // RegAllocBase will erase it after dequeueing. + // Nonetheless, clear the live-range so that the debug + // dump will show the right state for that VirtReg. + LI.clear(); return false; } @@ -685,7 +763,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) if (Order.isHint(Hint)) { - DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); EvictionCost MaxCost; MaxCost.setBrokenHints(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -704,7 +782,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (!Cost) return PhysReg; - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost << '\n'); unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); return CheapReg ? CheapReg : PhysReg; @@ -734,7 +812,7 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { } if (PhysReg) DEBUG(dbgs() << "can reassign: " << VirtReg << " from " - << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) + << printReg(PrevReg, TRI) << " to " << printReg(PhysReg, TRI) << '\n'); return PhysReg; } @@ -856,6 +934,92 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, return true; } +/// \brief Return true if all interferences between VirtReg and PhysReg between +/// Start and End can be evicted. +/// +/// \param VirtReg Live range that is about to be assigned. +/// \param PhysReg Desired register for assignment. +/// \param Start Start of range to look for interferences. +/// \param End End of range to look for interferences. +/// \param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// \return True when interference can be evicted cheaper than MaxCost. +bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, + unsigned PhysReg, SlotIndex Start, + SlotIndex End, + EvictionCost &MaxCost) { + EvictionCost Cost; + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + + // Check if interference overlast the segment in interest. + if (!Intf->overlaps(Start, End)) + continue; + + // Cannot evict non virtual reg interference. + if (!TargetRegisterInfo::isVirtualRegister(Intf->reg)) + return false; + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Done) + return false; + + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + } + } + + if (Cost.MaxWeight == 0) + return false; + + MaxCost = Cost; + return true; +} + +/// \brief Return tthe physical register that will be best +/// candidate for eviction by a local split interval that will be created +/// between Start and End. +/// +/// \param Order The allocation order +/// \param VirtReg Live range that is about to be assigned. +/// \param Start Start of range to look for interferences +/// \param End End of range to look for interferences +/// \param BestEvictweight The eviction cost of that eviction +/// \return The PhysReg which is the best candidate for eviction and the +/// eviction cost in BestEvictweight +unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictweight) { + EvictionCost BestEvictCost; + BestEvictCost.setMax(); + BestEvictCost.MaxWeight = VirtReg.weight; + unsigned BestEvicteePhys = 0; + + // Go over all physical registers and find the best candidate for eviction + for (auto PhysReg : Order.getOrder()) { + + if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, + BestEvictCost)) + continue; + + // Best so far. + BestEvicteePhys = PhysReg; + } + *BestEvictweight = BestEvictCost.MaxWeight; + return BestEvicteePhys; +} + /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. @@ -868,7 +1032,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (!Cascade) Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; - DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); // Collect all interfering virtregs first. @@ -890,6 +1054,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, // The same VirtReg may be present in multiple RegUnits. Skip duplicates. if (!VRM->hasPhys(Intf->reg)) continue; + + LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg); + Matrix->unassign(*Intf); assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && @@ -957,8 +1124,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // The first use of a callee-saved register in a function has cost 1. // Don't start using a CSR when the CostPerUseLimit is low. if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " - << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) + DEBUG(dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " + << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) << '\n'); continue; } @@ -1211,13 +1378,117 @@ BlockFrequency RAGreedy::calcSpillCost() { return Cost; } +/// \brief Check if splitting Evictee will create a local split interval in +/// basic block number BBNumber that may cause a bad eviction chain. This is +/// intended to prevent bad eviction sequences like: +/// movl %ebp, 8(%esp) # 4-byte Spill +/// movl %ecx, %ebp +/// movl %ebx, %ecx +/// movl %edi, %ebx +/// movl %edx, %edi +/// cltd +/// idivl %esi +/// movl %edi, %edx +/// movl %ebx, %edi +/// movl %ecx, %ebx +/// movl %ebp, %ecx +/// movl 16(%esp), %ebp # 4 - byte Reload +/// +/// Such sequences are created in 2 scenarios: +/// +/// Scenario #1: +/// %0 is evicted from physreg0 by %1. +/// Evictee %0 is intended for region splitting with split candidate +/// physreg0 (the reg %0 was evicted from). +/// Region splitting creates a local interval because of interference with the +/// evictor %1 (normally region spliitting creates 2 interval, the "by reg" +/// and "by stack" intervals and local interval created when interference +/// occurs). +/// One of the split intervals ends up evicting %2 from physreg1. +/// Evictee %2 is intended for region splitting with split candidate +/// physreg1. +/// One of the split intervals ends up evicting %3 from physreg2, etc. +/// +/// Scenario #2 +/// %0 is evicted from physreg0 by %1. +/// %2 is evicted from physreg2 by %3 etc. +/// Evictee %0 is intended for region splitting with split candidate +/// physreg1. +/// Region splitting creates a local interval because of interference with the +/// evictor %1. +/// One of the split intervals ends up evicting back original evictor %1 +/// from physreg0 (the reg %0 was evicted from). +/// Another evictee %2 is intended for region splitting with split candidate +/// physreg1. +/// One of the split intervals ends up evicting %3 from physreg2, etc. +/// +/// \param Evictee The register considered to be split. +/// \param Cand The split candidate that determines the physical register +/// we are splitting for and the interferences. +/// \param BBNumber The number of a BB for which the region split process will +/// create a local split interval. +/// \param Order The phisical registers that may get evicted by a split +/// artifact of Evictee. +/// \return True if splitting Evictee may cause a bad eviction chain, false +/// otherwise. +bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, + GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order) { + EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee); + unsigned Evictor = VregEvictorInfo.first; + unsigned PhysReg = VregEvictorInfo.second; + + // No actual evictor. + if (!Evictor || !PhysReg) + return false; + + float MaxWeight = 0; + unsigned FutureEvictedPhysReg = + getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), + Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); + + // The bad eviction chain occurs when either the split candidate the the + // evited reg or one of the split artifact will evict the evicting reg. + if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) + return false; + + Cand.Intf.moveToBlock(BBNumber); + + // Check to see if the Evictor contains interference (with Evictee) in the + // given BB. If so, this interference caused the eviction of Evictee from + // PhysReg. This suggest that we will create a local interval during the + // region split to avoid this interference This local interval may cause a bad + // eviction chain. + if (!LIS->hasInterval(Evictor)) + return false; + LiveInterval &EvictorLI = LIS->getInterval(Evictor); + if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end()) + return false; + + // Now, check to see if the local interval we will create is going to be + // expensive enough to evict somebody If so, this may cause a bad eviction + // chain. + VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI); + float splitArtifactWeight = + VRAI.futureWeight(LIS->getInterval(Evictee), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) + return false; + + return true; +} + /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + const AllocationOrder &Order, + bool *CanCauseEvictionChain) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; + unsigned VirtRegToSplit = SA->getParent().reg; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -1226,6 +1497,24 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; + Cand.Intf.moveToBlock(BC.Number); + // Check wheather a local interval is going to be created during the region + // split. + if (EnableAdvancedRASplitCost && CanCauseEvictionChain && + Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn && + RegOut) { + + if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { + // This interfernce cause our eviction from this assignment, we might + // evict somebody else, add that cost. + // See splitCanCauseEvictionChain for detailed description of scenarios. + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + + *CanCauseEvictionChain = true; + } + } + if (BI.LiveIn) Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) @@ -1246,6 +1535,20 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { if (Cand.Intf.hasInterference()) { GlobalCost += SpillPlacer->getBlockFrequency(Number); GlobalCost += SpillPlacer->getBlockFrequency(Number); + + // Check wheather a local interval is going to be created during the + // region split. + if (EnableAdvancedRASplitCost && CanCauseEvictionChain && + splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { + // This interfernce cause our eviction from this assignment, we might + // evict somebody else, add that cost. + // See splitCanCauseEvictionChain for detailed description of + // scenarios. + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + + *CanCauseEvictionChain = true; + } } continue; } @@ -1309,7 +1612,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Create separate intervals for isolated blocks with multiple uses. if (!IntvIn && !IntvOut) { - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); continue; @@ -1410,6 +1713,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { unsigned NumCands = 0; + BlockFrequency SpillCost = calcSpillCost(); BlockFrequency BestCost; // Check if we can split this live range around a compact region. @@ -1421,14 +1725,24 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = calcSpillCost(); + BestCost = SpillCost; DEBUG(dbgs() << "Cost of isolating all blocks = "; MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + bool CanCauseEvictionChain = false; unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, - false/*IgnoreCSR*/); + false /*IgnoreCSR*/, &CanCauseEvictionChain); + + // Split candidates with compact regions can cause a bad eviction sequence. + // See splitCanCauseEvictionChain for detailed description of scenarios. + // To avoid it, we need to comapre the cost with the spill cost and not the + // current max frequency. + if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && + CanCauseEvictionChain) { + return 0; + } // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) @@ -1440,8 +1754,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, - bool IgnoreCSR) { + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain) { unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { @@ -1476,10 +1790,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, SpillPlacer->prepare(Cand.LiveBundles); BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); + DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; + DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; MBFI->printBlockFreq(dbgs(), Cost)); if (Cost >= BestCost) { DEBUG({ @@ -1487,7 +1801,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, dbgs() << " worse than no bundles\n"; else dbgs() << " worse than " - << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; }); continue; } @@ -1501,7 +1815,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, continue; } - Cost += calcGlobalSplitCost(Cand); + bool HasEvictionChain = false; + Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1512,9 +1827,24 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (Cost < BestCost) { BestCand = NumCands; BestCost = Cost; + // See splitCanCauseEvictionChain for detailed description of bad + // eviction chain scenarios. + if (CanCauseEvictionChain) + *CanCauseEvictionChain = HasEvictionChain; } ++NumCands; } + + if (CanCauseEvictionChain && BestCand != NoCand) { + // See splitCanCauseEvictionChain for detailed description of bad + // eviction chain scenarios. + DEBUG(dbgs() << "Best split candidate of vreg " + << printReg(VirtReg.reg, TRI) << " may "); + if (!(*CanCauseEvictionChain)) + DEBUG(dbgs() << "not "); + DEBUG(dbgs() << "cause bad eviction chain\n"); + } + return BestCand; } @@ -1535,7 +1865,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { UsedCands.push_back(BestCand); Cand.IntvIdx = SE->openIntv(); - DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " << B << " bundles, intv " << Cand.IntvIdx << ".\n"); (void)B; } @@ -1884,7 +2214,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' + DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore] << '-' << Uses[SplitAfter] << " i=" << MaxGap); @@ -1985,7 +2315,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); - DEBUG(dbgs() << PrintReg(LREdit.get(i))); + DEBUG(dbgs() << printReg(LREdit.get(i))); } DEBUG(dbgs() << '\n'); } @@ -2051,6 +2381,15 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // Last Chance Recoloring //===----------------------------------------------------------------------===// +/// Return true if \p reg has any tied def operand. +static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { + for (const MachineOperand &MO : MRI->def_operands(reg)) + if (MO.isTied()) + return true; + + return false; +} + /// mayRecolorAllInterferences - Check if the virtual registers that /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be /// recolored to free \p PhysReg. @@ -2079,10 +2418,13 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, LiveInterval *Intf = Q.interferingVRegs()[i - 1]; // If Intf is done and sit on the same register class as VirtReg, // it would not be recolorable as it is in the same state as VirtReg. - if ((getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg) == CurRC) || + // However, if VirtReg has tied defs and Intf doesn't, then + // there is still a point in examining if it can be recolorable. + if (((getStage(*Intf) == RS_Done && + MRI->getRegClass(Intf->reg) == CurRC) && + !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) || FixedRegisters.count(Intf->reg)) { - DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); + DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n"); return false; } RecoloringCandidates.insert(Intf); @@ -2162,7 +2504,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " - << PrintReg(PhysReg, TRI) << '\n'); + << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); VirtRegToPhysReg.clear(); CurrentNewVRegs.clear(); @@ -2170,7 +2512,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // It is only possible to recolor virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { - DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); + DEBUG(dbgs() << "Some interferences are not with virtual registers.\n"); continue; } @@ -2179,7 +2521,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // the interferences. if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, FixedRegisters)) { - DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); + DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); continue; } @@ -2222,7 +2564,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, } DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " - << PrintReg(PhysReg, TRI) << '\n'); + << printReg(PhysReg, TRI) << '\n'); // The recoloring attempt failed, undo the changes. FixedRegisters = SaveFixedRegisters; @@ -2285,7 +2627,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, continue; } DEBUG(dbgs() << "Recoloring of " << *LI - << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); + << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); Matrix->assign(*LI, PhysReg); FixedRegisters.insert(LI->reg); @@ -2300,7 +2642,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<unsigned> &NewVRegs) { CutOffInfo = CO_None; - LLVMContext &Ctx = MF->getFunction()->getContext(); + LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); if (Reg == ~0U && (CutOffInfo != CO_None)) { @@ -2452,8 +2794,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Visited.insert(Reg); RecoloringCandidates.push_back(Reg); - DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' - << PrintReg(PhysReg, TRI) << ")\n"); + DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) << '(' + << printReg(PhysReg, TRI) << ")\n"); do { Reg = RecoloringCandidates.pop_back_val(); @@ -2474,7 +2816,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Matrix->checkInterference(LI, PhysReg))) continue; - DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) + DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) << ") is recolorable.\n"); // Gather the hint info. @@ -2565,6 +2907,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // If VirtReg got an assignment, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. @@ -2598,6 +2942,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // copy-related live-ranges. if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); + // If VirtReg eviction someone, the eviction info for it as an evictee is + // no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; } @@ -2617,8 +2964,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); - if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) + if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { + // If VirtReg got split, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; + } } // If we couldn't allocate a register from spilling, there is probably some @@ -2702,17 +3052,20 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, if (Reloads || FoldedReloads || Spills || FoldedSpills) { using namespace ore; - MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", - L->getStartLoc(), L->getHeader()); - if (Spills) - R << NV("NumSpills", Spills) << " spills "; - if (FoldedSpills) - R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; - if (Reloads) - R << NV("NumReloads", Reloads) << " reloads "; - if (FoldedReloads) - R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; - ORE->emit(R << "generated in loop"); + ORE->emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", + L->getStartLoc(), L->getHeader()); + if (Spills) + R << NV("NumSpills", Spills) << " spills "; + if (FoldedSpills) + R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; + if (Reloads) + R << NV("NumReloads", Reloads) << " reloads "; + if (FoldedReloads) + R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; + R << "generated in loop"; + return R; + }); } } @@ -2729,6 +3082,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { MF->getSubtarget().enableRALocalReassignment( MF->getTarget().getOptLevel()); + EnableAdvancedRASplitCost = ConsiderLocalIntervalCost || + MF->getSubtarget().enableAdvancedRASplitCost(); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2760,6 +3116,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); + LastEvicted.clear(); allocatePhysRegs(); tryHintsRecoloring(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 9778103575fa..351e91c932eb 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -43,7 +43,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -59,6 +59,8 @@ #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -70,8 +72,6 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -668,7 +668,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg, const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); (void)TRI; - DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " + DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to @@ -677,7 +677,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg, I != E; ++I) { const LiveInterval &LI = LIS.getInterval(*I); assert(!LI.empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); VRegsToAlloc.insert(LI.reg); } @@ -707,7 +707,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; - DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> " + DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " << TRI.getName(PReg) << "\n"); assert(PReg != 0 && "Invalid preg selected."); VRM.assignVirt2Phys(VReg, PReg); @@ -799,7 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { findVRegIntervalsToAlloc(MF, LIS); #ifndef NDEBUG - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); std::string FullyQualifiedName = F.getParent()->getModuleIdentifier() + "." + F.getName().str(); #endif @@ -864,7 +864,7 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); - OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; + OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')'; }); } diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 855aa37ff3c3..f49ea25bbf35 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -27,7 +27,7 @@ #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" using namespace llvm; @@ -95,7 +95,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; RegMask.resize(RegMaskSize, 0xFFFFFFFF); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); @@ -127,10 +127,12 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { const uint32_t *CallPreservedMask = - TRI->getCallPreservedMask(MF, F->getCallingConv()); - // Set callee saved register as preserved. - for (unsigned i = 0; i < RegMaskSize; ++i) - RegMask[i] = RegMask[i] | CallPreservedMask[i]; + TRI->getCallPreservedMask(MF, F.getCallingConv()); + if (CallPreservedMask) { + // Set callee saved register as preserved. + for (unsigned i = 0; i < RegMaskSize; ++i) + RegMask[i] = RegMask[i] | CallPreservedMask[i]; + } } else { ++NumCSROpt; DEBUG(dbgs() << MF.getName() @@ -139,11 +141,11 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) - DEBUG(dbgs() << TRI->getName(PReg) << " "); + DEBUG(dbgs() << printReg(PReg, TRI) << " "); DEBUG(dbgs() << " \n----------------------------------------\n"); - PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask)); + PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask)); return false; } diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp index 5cc35bfeca63..5b12d00e126f 100644 --- a/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -87,14 +88,31 @@ void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +// Assumes call instructions have a single reference to a function. +static const Function *findCalledFunction(const Module &M, MachineInstr &MI) { + for (MachineOperand &MO : MI.operands()) { + if (MO.isGlobal()) + return dyn_cast<Function>(MO.getGlobal()); + + if (MO.isSymbol()) + return M.getFunction(MO.getSymbolName()); + } + + return nullptr; +} + bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { - const Module *M = MF.getFunction()->getParent(); + const Module *M = MF.getFunction().getParent(); PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() << " ++++++++++++++++++++ \n"); DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n"); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.hasCalls() && !MFI.hasTailCall()) + return false; + bool Changed = false; for (MachineBasicBlock &MBB : MF) { @@ -113,15 +131,14 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { Changed = true; }; - MachineOperand &Operand = MI.getOperand(0); - if (Operand.isGlobal()) - UpdateRegMask(cast<Function>(Operand.getGlobal())); - else if (Operand.isSymbol()) - UpdateRegMask(M->getFunction(Operand.getSymbolName())); + if (const Function *F = findCalledFunction(*M, MI)) { + UpdateRegMask(F); + } else { + DEBUG(dbgs() << "Failed to find call target function\n"); + } - DEBUG(dbgs() - << "Call Instruction After Register Usage Info Propagation : \n"); - DEBUG(dbgs() << MI << "\n"); + DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : " + << MI << '\n'); } } diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 956dec39fc38..b0eeb81f583e 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -20,13 +20,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -153,7 +153,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { DEBUG({ dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) - dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); + dbgs() << ' ' << printReg(RCI.Order[I], TRI); dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n"); }); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index a67d07b36474..00a2e93c71ca 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1,4 +1,4 @@ -//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// +//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===// // // The LLVM Compiler Infrastructure // @@ -14,32 +14,49 @@ //===----------------------------------------------------------------------===// #include "RegisterCoalescer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/Value.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> -#include <cmath> +#include <cassert> +#include <iterator> +#include <limits> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -53,10 +70,9 @@ STATISTIC(NumInflated , "Number of register classes inflated"); STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); -static cl::opt<bool> -EnableJoining("join-liveintervals", - cl::desc("Coalesce copies (default=true)"), - cl::init(true)); +static cl::opt<bool> EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true), cl::Hidden); static cl::opt<bool> UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), @@ -79,11 +95,11 @@ VerifyCoalescing("verify-coalescing", cl::Hidden); namespace { + class RegisterCoalescer : public MachineFunctionPass, private LiveRangeEdit::Delegate { MachineFunction* MF; MachineRegisterInfo* MRI; - const TargetMachine* TM; const TargetRegisterInfo* TRI; const TargetInstrInfo* TII; LiveIntervals *LIS; @@ -211,9 +227,9 @@ namespace { /// flag. /// This can happen when undef uses were previously concealed by a copy /// which we coalesced. Example: - /// %vreg0:sub0<def,read-undef> = ... - /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef - /// = use %vreg1:sub1 <-- hidden undef use + /// %0:sub0<def,read-undef> = ... + /// %1 = COPY %0 <-- Coalescing COPY reveals undef + /// = use %1:sub1 <-- hidden undef use void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, MachineOperand &MO, unsigned SubRegIdx); @@ -248,8 +264,19 @@ namespace { } } + /// Wrapper Method to do all the necessary work when an Instruction is + /// deleted. + /// Optimizations should use this to make sure that deleted instructions + /// are always accounted for. + void deleteInstr(MachineInstr* MI) { + ErasedInstrs.insert(MI); + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + public: static char ID; ///< Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); } @@ -264,8 +291,11 @@ namespace { /// Implement the dump method. void print(raw_ostream &O, const Module* = nullptr) const override; }; + } // end anonymous namespace +char RegisterCoalescer::ID = 0; + char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", @@ -277,8 +307,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) -char RegisterCoalescer::ID = 0; - static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, unsigned &Src, unsigned &Dst, unsigned &SrcSub, unsigned &DstSub) { @@ -334,7 +362,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { Flipped = true; } - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); if (TargetRegisterInfo::isPhysicalRegister(Dst)) { // Eliminate DstSub on a physreg. @@ -540,7 +568,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // in IntB, we can merge them. if (ValS+1 != BS) return false; - DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); + DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -616,8 +644,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, /// Copy segements with value number @p SrcValNo from liverange @p Src to live /// range @Dst and use value number @p DstValNo there. static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, - const LiveRange &Src, const VNInfo *SrcValNo) -{ + const LiveRange &Src, const VNInfo *SrcValNo) { for (const LiveRange::Segment &S : Src.segments) { if (S.valno != SrcValNo) continue; @@ -640,7 +667,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // its other operand is coalesced to the copy dest register, see if we can // transform the copy into a noop by commuting the definition. For example, // - // A3 = op A2 B0<kill> + // A3 = op A2 killed B0 // ... // B1 = A3 <- this copy // ... @@ -648,7 +675,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // // ==> // - // B2 = op B0 A2<kill> + // B2 = op B0 killed A2 // ... // B1 = B2 <- now an identity copy // ... @@ -741,7 +768,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // ... // B = A // ... - // C = A<kill> + // C = killed A // ... // = B @@ -797,9 +824,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, S.MergeValueNumberInto(SubDVNI, SubBValNo); } - ErasedInstrs.insert(UseMI); - LIS->RemoveMachineInstrFromMaps(*UseMI); - UseMI->eraseFromParent(); + deleteInstr(UseMI); } // Extend BValNo by merging in IntA live segments of AValNo. Val# definition @@ -966,8 +991,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // Now ok to move copy. if (CopyLeftBB) { - DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#" - << CopyLeftBB->getNumber() << '\t' << CopyMI); + DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " + << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); // Insert new copy to CopyLeftBB. auto InsPos = CopyLeftBB->getFirstTerminator(); @@ -985,18 +1010,16 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // the deleted list. ErasedInstrs.erase(NewCopyMI); } else { - DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" - << MBB.getNumber() << '\t' << CopyMI); + DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " + << printMBBReference(MBB) << '\t' << CopyMI); } // Remove CopyMI. // Note: This is fine to remove the copy before updating the live-ranges. // While updating the live-ranges, we only look at slot indices and // never go back to the instruction. - LIS->RemoveMachineInstrFromMaps(CopyMI); // Mark instructions as deleted. - ErasedInstrs.insert(&CopyMI); - CopyMI.eraseFromParent(); + deleteInstr(&CopyMI); // Update the liveness. SmallVector<SlotIndex, 8> EndPoints; @@ -1119,10 +1142,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewMI.setDebugLoc(DL); // In a situation like the following: - // %vreg0:subreg = instr ; DefMI, subreg = DstIdx - // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 - // instead of widening %vreg1 to the register class of %vreg0 simply do: - // %vreg1 = instr + // %0:subreg = instr ; DefMI, subreg = DstIdx + // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 + // instead of widening %1 to the register class of %0 simply do: + // %1 = instr const TargetRegisterClass *NewRC = CP.getNewRC(); if (DstIdx != 0) { MachineOperand &DefMO = NewMI.getOperand(0); @@ -1202,12 +1225,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // This could happen if the rematerialization instruction is rematerializing // more than actually is used in the register. // An example would be: - // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs + // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs // ; Copying only part of the register here, but the rest is undef. - // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit + // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit // ==> // ; Materialize all the constants but only using one - // vreg2 = LOAD_CONSTANTS 5, 8 + // %2 = LOAD_CONSTANTS 5, 8 // // at this point for the part that wasn't defined before we could have // subranges missing the definition. @@ -1230,11 +1253,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // Make sure that the subrange for resultant undef is removed // For example: - // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1 - // vreg2<def> = COPY vreg1 + // %1:sub1<def,read-undef> = LOAD CONSTANT 1 + // %2 = COPY %1 // ==> - // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1 - // ; Correct but need to remove the subrange for vreg2:sub0 + // %2:sub1<def, read-undef> = LOAD CONSTANT 1 + // ; Correct but need to remove the subrange for %2:sub0 // ; as it is now undef if (NewIdx != 0 && DstInt.hasSubRanges()) { // The affected subregister segments can be removed. @@ -1268,15 +1291,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // Otherwise, variables that live through may miss some // interferences, thus creating invalid allocation. // E.g., i386 code: - // vreg1 = somedef ; vreg1 GR8 - // vreg2 = remat ; vreg2 GR32 - // CL = COPY vreg2.sub_8bit - // = somedef vreg1 ; vreg1 GR8 + // %1 = somedef ; %1 GR8 + // %2 = remat ; %2 GR32 + // CL = COPY %2.sub_8bit + // = somedef %1 ; %1 GR8 // => - // vreg1 = somedef ; vreg1 GR8 - // ECX<def, dead> = remat ; CL<imp-def> - // = somedef vreg1 ; vreg1 GR8 - // vreg1 will see the inteferences with CL but not with CH since + // %1 = somedef ; %1 GR8 + // dead ECX = remat ; implicit-def CL + // = somedef %1 ; %1 GR8 + // %1 will see the inteferences with CL but not with CH since // no live-ranges would have been created for ECX. // Fix that! SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); @@ -1313,6 +1336,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { UseMO.setReg(DstReg); + // Move the debug value directly after the def of the rematerialized + // value in DstReg. + MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI); DEBUG(dbgs() << "\t\tupdated: " << *UseMI); } } @@ -1326,9 +1352,9 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // ProcessImpicitDefs may leave some copies of <undef> values, it only removes // local variables. When we have a copy like: // - // %vreg1 = COPY %vreg2<undef> + // %1 = COPY undef %2 // - // We delete the copy and remove the corresponding value number from %vreg1. + // We delete the copy and remove the corresponding value number from %1. // Any uses of that value number are marked as <undef>. // Note that we do not query CoalescerPair here but redo isMoveInstr as the @@ -1540,7 +1566,6 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { } bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { - Again = false; DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); @@ -1560,7 +1585,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { std::swap(SrcRC, DstRC); } if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, - CP.getNewRC())) { + CP.getNewRC(), *LIS)) { DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); return false; } @@ -1578,8 +1603,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Eliminate undefs. if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { - LIS->RemoveMachineInstrFromMaps(*CopyMI); - CopyMI->eraseFromParent(); + deleteInstr(CopyMI); return false; // Not coalescable. } @@ -1607,15 +1631,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } - LIS->RemoveMachineInstrFromMaps(*CopyMI); - CopyMI->eraseFromParent(); + deleteInstr(CopyMI); return true; } // Enforce policies. if (CP.isPhys()) { - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) - << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) + DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI) + << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'); if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by @@ -1637,13 +1660,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { dbgs() << "\tConsidering merging to " << TRI->getRegClassName(CP.getNewRC()) << " with "; if (CP.getDstIdx() && CP.getSrcIdx()) - dbgs() << PrintReg(CP.getDstReg()) << " in " + dbgs() << printReg(CP.getDstReg()) << " in " << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " - << PrintReg(CP.getSrcReg()) << " in " + << printReg(CP.getSrcReg()) << " in " << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; else - dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " - << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; + dbgs() << printReg(CP.getSrcReg(), TRI) << " in " + << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; }); } @@ -1668,8 +1691,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!CP.isPartial() && !CP.isPhys()) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { - LIS->RemoveMachineInstrFromMaps(*CopyMI); - CopyMI->eraseFromParent(); + deleteInstr(CopyMI); DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1735,11 +1757,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) - << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; + dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) + << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; dbgs() << "\tResult = "; if (CP.isPhys()) - dbgs() << PrintReg(CP.getDstReg(), TRI); + dbgs() << printReg(CP.getDstReg(), TRI); else dbgs() << LIS->getInterval(CP.getDstReg()); dbgs() << '\n'; @@ -1774,7 +1796,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { return false; } if (RHS.overlaps(LIS->getRegUnit(*UI))) { - DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); + DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n'); return false; } } @@ -1797,20 +1819,20 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { MachineInstr *CopyMI; if (CP.isFlipped()) { // Physreg is copied into vreg - // %vregY = COPY %X - // ... //< no other def of %X here - // use %vregY + // %y = COPY %physreg_x + // ... //< no other def of %x here + // use %y // => // ... - // use %X + // use %x CopyMI = MRI->getVRegDef(SrcReg); } else { // VReg is copied into physreg: - // %vregX = def - // ... //< no other def or use of %Y here - // %Y = COPY %vregX + // %y = def + // ... //< no other def or use of %y here + // %y = COPY %physreg_x // => - // %Y = def + // %y = def // ... if (!MRI->hasOneNonDBGUse(SrcReg)) { DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); @@ -1829,7 +1851,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { if (!MRI->isConstantPhysReg(DstReg)) { // We checked above that there are no interfering defs of the physical - // register. However, for this case, where we intent to move up the def of + // register. However, for this case, where we intend to move up the def of // the physical register, we also need to check for interfering uses. SlotIndexes *Indexes = LIS->getSlotIndexes(); for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); @@ -1844,7 +1866,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. - DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) + DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n"); LIS->removePhysRegDefAt(DstReg, CopyRegIdx); @@ -1855,8 +1877,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { } } - LIS->RemoveMachineInstrFromMaps(*CopyMI); - CopyMI->eraseFromParent(); + deleteInstr(CopyMI); // We don't track kills for reserved registers. MRI->clearKillFlags(CP.getSrcReg()); @@ -1906,7 +1927,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // // %dst:ssub0<def,read-undef> = FOO // %src = BAR -// %dst:ssub1<def> = COPY %src +// %dst:ssub1 = COPY %src // // The live range of %src overlaps the %dst value defined by FOO, but // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane @@ -1921,14 +1942,15 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // is live, but never read. This can happen because we don't compute // individual live ranges per lane. // -// %dst<def> = FOO +// %dst = FOO // %src = BAR -// %dst:ssub1<def> = COPY %src +// %dst:ssub1 = COPY %src // // This kind of interference is only resolved locally. If the clobbered // lane value escapes the block, the join is aborted. namespace { + /// Track information about values in a single virtual register about to be /// joined. Objects of this class are always created in pairs - one for each /// side of the CoalescerPair (or one for each lane of a side of the coalescer @@ -1936,6 +1958,7 @@ namespace { class JoinVals { /// Live range we work on. LiveRange &LR; + /// (Main) register we work on. const unsigned Reg; @@ -1943,6 +1966,7 @@ class JoinVals { /// subregister SubIdx in the coalesced register. Either CP.DstIdx or /// CP.SrcIdx. const unsigned SubIdx; + /// The LaneMask that this liverange will occupy the coalesced register. May /// be smaller than the lanemask produced by SubIdx when merging subranges. const LaneBitmask LaneMask; @@ -1950,6 +1974,7 @@ class JoinVals { /// This is true when joining sub register ranges, false when joining main /// ranges. const bool SubRangeJoin; + /// Whether the current LiveInterval tracks subregister liveness. const bool TrackSubRegLiveness; @@ -1997,7 +2022,7 @@ class JoinVals { /// joined register, so they can be compared directly between SrcReg and /// DstReg. struct Val { - ConflictResolution Resolution; + ConflictResolution Resolution = CR_Keep; /// Lanes written by this def, 0 for unanalyzed values. LaneBitmask WriteLanes; @@ -2007,10 +2032,10 @@ class JoinVals { LaneBitmask ValidLanes; /// Value in LI being redefined by this def. - VNInfo *RedefVNI; + VNInfo *RedefVNI = nullptr; /// Value in the other live range that overlaps this def, if any. - VNInfo *OtherVNI; + VNInfo *OtherVNI = nullptr; /// Is this value an IMPLICIT_DEF that can be erased? /// @@ -2023,18 +2048,16 @@ class JoinVals { /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with /// longer live ranges. Such IMPLICIT_DEF values should be treated like /// normal values. - bool ErasableImplicitDef; + bool ErasableImplicitDef = false; /// True when the live range of this value will be pruned because of an /// overlapping CR_Replace value in the other live range. - bool Pruned; + bool Pruned = false; /// True once Pruned above has been computed. - bool PrunedComputed; + bool PrunedComputed = false; - Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(), - RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false), - Pruned(false), PrunedComputed(false) {} + Val() = default; bool isAnalyzed() const { return WriteLanes.any(); } }; @@ -2081,8 +2104,9 @@ class JoinVals { /// entry to TaintedVals. /// /// Returns false if the tainted lanes extend beyond the basic block. - bool taintExtent(unsigned, LaneBitmask, JoinVals&, - SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&); + bool + taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, + SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent); /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. @@ -2104,8 +2128,7 @@ public: : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), - TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) - {} + TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} /// Analyze defs in LR and compute a value mapping in NewVNInfo. /// Returns false if any conflicts were impossible to resolve. @@ -2149,6 +2172,7 @@ public: /// Get the value assignments suitable for passing to LiveInterval::join. const int *getAssignments() const { return Assignments.data(); } }; + } // end anonymous namespace LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) @@ -2239,7 +2263,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1) + LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0) : TRI->getSubRegIndexLaneMask(SubIdx); V.ValidLanes = V.WriteLanes = Lanes; } else { @@ -2247,7 +2271,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.WriteLanes = V.ValidLanes = LaneBitmask(1); + V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0); if (DefMI->isImplicitDef()) { V.ValidLanes = LaneBitmask::getNone(); V.ErasableImplicitDef = true; @@ -2263,7 +2287,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // // This adds ssub1 to the set of valid lanes in %src: // - // %src:ssub1<def> = FOO + // %src:ssub1 = FOO // // This leaves only ssub1 valid, making any other lanes undef: // @@ -2352,7 +2376,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { if (OtherV.ErasableImplicitDef && DefMI && DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def - << " extends into BB#" << DefMI->getParent()->getNumber() + << " extends into " << printMBBReference(*DefMI->getParent()) << ", keeping it.\n"); OtherV.ErasableImplicitDef = false; } @@ -2401,9 +2425,9 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // // 1 %dst:ssub0 = FOO <-- OtherVNI // 2 %src = BAR <-- VNI - // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. - // 4 BAZ %dst<kill> - // 5 QUUX %src<kill> + // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy. + // 4 BAZ killed %dst + // 5 QUUX killed %src // // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace // handles this complex value mapping. @@ -2413,7 +2437,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // If the other live range is killed by DefMI and the live ranges are still // overlapping, it must be because we're looking at an early clobber def: // - // %dst<def,early-clobber> = ASM %src<kill> + // %dst<def,early-clobber> = ASM killed %src // // In this case, it is illegal to merge the two live ranges since the early // clobber def would clobber %src before it was read. @@ -2463,9 +2487,9 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; - DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@' + DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' << LR.getValNumInfo(ValNo)->def << " into " - << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@' + << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' << V.OtherVNI->def << " --> @" << NewVNInfo[Assignments[ValNo]]->def << '\n'); break; @@ -2493,7 +2517,7 @@ bool JoinVals::mapValues(JoinVals &Other) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { computeAssignment(i, Other); if (Vals[i].Resolution == CR_Impossible) { - DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i + DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i << '@' << LR.getValNumInfo(i)->def << '\n'); return false; } @@ -2503,7 +2527,7 @@ bool JoinVals::mapValues(JoinVals &Other) { bool JoinVals:: taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, - SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) { + SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) { VNInfo *VNI = LR.getValNumInfo(ValNo); MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); @@ -2516,11 +2540,11 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, // lanes escape the block. SlotIndex End = OtherI->end; if (End >= MBBEnd) { - DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':' + DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' << OtherI->valno->id << '@' << OtherI->start << '\n'); return false; } - DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':' + DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' << OtherI->valno->id << '@' << OtherI->start << " to " << End << '\n'); // A dead def is not a problem. @@ -2560,10 +2584,10 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, bool JoinVals::resolveConflicts(JoinVals &Other) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { Val &V = Vals[i]; - assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + assert(V.Resolution != CR_Impossible && "Unresolvable conflict"); if (V.Resolution != CR_Unresolved) continue; - DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i + DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@' << LR.getValNumInfo(i)->def << '\n'); if (SubRangeJoin) return false; @@ -2598,7 +2622,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { Indexes->getInstructionFromIndex(TaintExtent.front().first); assert(LastMI && "Range must end at a proper instruction"); unsigned TaintNum = 0; - for (;;) { + while (true) { assert(MI != MBB->end() && "Bad LastMI"); if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); @@ -2658,13 +2682,13 @@ void JoinVals::pruneValues(JoinVals &Other, if (!Def.isBlock()) { if (changeInstrs) { // Remove <def,read-undef> flags. This def is now a partial redef. - // Also remove <def,dead> flags since the joined live range will + // Also remove dead flags since the joined live range will // continue past this instruction. for (MachineOperand &MO : Indexes->getInstructionFromIndex(Def)->operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { - if (MO.getSubReg() != 0) - MO.setIsUndef(EraseImpDef); + if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) + MO.setIsUndef(false); MO.setIsDead(false); } } @@ -2674,7 +2698,7 @@ void JoinVals::pruneValues(JoinVals &Other, if (!EraseImpDef) EndPoints.push_back(Def); } - DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def + DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def << ": " << Other.LR << '\n'); break; } @@ -2686,7 +2710,7 @@ void JoinVals::pruneValues(JoinVals &Other, // computeAssignment(), the value that was originally copied could have // been replaced. LIS->pruneValue(LR, Def, &EndPoints); - DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at " + DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " << Def << ": " << LR << '\n'); } break; @@ -2994,7 +3018,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { R.LaneMask = Mask; } } - DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg()) + DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS << '\n'); // Determine lanemasks of RHS in the coalesced register and merge subranges. @@ -3068,6 +3092,7 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { } namespace { + /// Information concerning MBB coalescing priority. struct MBBPriorityInfo { MachineBasicBlock *MBB; @@ -3077,7 +3102,8 @@ struct MBBPriorityInfo { MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) : MBB(mbb), Depth(depth), IsSplit(issplit) {} }; -} + +} // end anonymous namespace /// C-style comparator that sorts first based on the loop depth of the basic /// block (the unsigned), and then on the MBB number. @@ -3194,7 +3220,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { continue; // Check that OtherReg interfere with DstReg. if (LIS->getInterval(OtherReg).overlaps(DstLI)) { - DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); + DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n'); return true; } } @@ -3281,7 +3307,7 @@ void RegisterCoalescer::joinAllIntervals() { array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); // Coalesce intervals in MBB priority order. - unsigned CurrDepth = UINT_MAX; + unsigned CurrDepth = std::numeric_limits<unsigned>::max(); for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { // Try coalescing the collected local copies for deeper loops. if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { @@ -3308,7 +3334,6 @@ void RegisterCoalescer::releaseMemory() { bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &fn.getRegInfo(); - TM = &fn.getTarget(); const TargetSubtargetInfo &STI = fn.getSubtarget(); TRI = STI.getRegisterInfo(); TII = STI.getInstrInfo(); @@ -3349,7 +3374,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { if (MRI->reg_nodbg_empty(Reg)) continue; if (MRI->recomputeRegClass(Reg)) { - DEBUG(dbgs() << PrintReg(Reg) << " inflated to " + DEBUG(dbgs() << printReg(Reg) << " inflated to " << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); ++NumInflated; diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 04067a1427af..1a46f6d053e6 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -1,4 +1,4 @@ -//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===// +//===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,10 +17,9 @@ namespace llvm { - class MachineInstr; - class TargetRegisterInfo; - class TargetRegisterClass; - class TargetInstrInfo; +class MachineInstr; +class TargetRegisterClass; +class TargetRegisterInfo; /// A helper class for register coalescers. When deciding if /// two registers can be coalesced, CoalescerPair can determine if a copy @@ -30,43 +29,40 @@ namespace llvm { /// The register that will be left after coalescing. It can be a /// virtual or physical register. - unsigned DstReg; + unsigned DstReg = 0; /// The virtual register that will be coalesced into dstReg. - unsigned SrcReg; + unsigned SrcReg = 0; /// The sub-register index of the old DstReg in the new coalesced register. - unsigned DstIdx; + unsigned DstIdx = 0; /// The sub-register index of the old SrcReg in the new coalesced register. - unsigned SrcIdx; + unsigned SrcIdx = 0; /// True when the original copy was a partial subregister copy. - bool Partial; + bool Partial = false; /// True when both regs are virtual and newRC is constrained. - bool CrossClass; + bool CrossClass = false; /// True when DstReg and SrcReg are reversed from the original /// copy instruction. - bool Flipped; + bool Flipped = false; /// The register class of the coalesced register, or NULL if DstReg /// is a physreg. This register class may be a super-register of both /// SrcReg and DstReg. - const TargetRegisterClass *NewRC; + const TargetRegisterClass *NewRC = nullptr; public: - CoalescerPair(const TargetRegisterInfo &tri) - : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), - Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} + CoalescerPair(const TargetRegisterInfo &tri) : TRI(tri) {} /// Create a CoalescerPair representing a virtreg-to-physreg copy. /// No need to call setRegisters(). CoalescerPair(unsigned VirtReg, unsigned PhysReg, const TargetRegisterInfo &tri) - : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), - Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {} /// Set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. @@ -111,6 +107,7 @@ namespace llvm { /// Return the register class of the coalesced register. const TargetRegisterClass *getNewRC() const { return NewRC; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_REGISTERCOALESCER_H diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 88e0a3b58940..9ac810c7c723 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -17,7 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -26,14 +26,14 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -97,7 +97,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (const RegisterMaskPair &P : LiveInRegs) { - dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); + dbgs() << printVRegOrUnit(P.RegUnit, TRI); if (!P.LaneMask.all()) dbgs() << ':' << PrintLaneMask(P.LaneMask); dbgs() << ' '; @@ -105,7 +105,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << '\n'; dbgs() << "Live Out: "; for (const RegisterMaskPair &P : LiveOutRegs) { - dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); + dbgs() << printVRegOrUnit(P.RegUnit, TRI); if (!P.LaneMask.all()) dbgs() << ':' << PrintLaneMask(P.LaneMask); dbgs() << ' '; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index fc5105aadbff..97967124add6 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -16,30 +16,33 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/RegisterScavenging.h" - +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/PassSupport.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <iterator> #include <limits> #include <string> +#include <utility> using namespace llvm; @@ -210,7 +213,7 @@ void RegScavenger::forward() { continue; if (!isRegUsed(Reg)) { // Check if it's partial live: e.g. - // D0 = insert_subreg D0<undef>, S0 + // D0 = insert_subreg undef D0, S0 // ... D0 // The problem is the insert_subreg could be eliminated. The use of // D0 is using a partially undef value. This is not *incorrect* since @@ -285,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (unsigned Reg : *RC) { if (!isRegUsed(Reg)) { - DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) << - "\n"); + DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) + << "\n"); return Reg; } } @@ -460,7 +463,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. - const MachineFunction &MF = *Before->getParent()->getParent(); + const MachineFunction &MF = *Before->getMF(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned NeedSize = TRI->getSpillSize(RC); unsigned NeedAlign = TRI->getSpillAlignment(RC); @@ -533,7 +536,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { MachineInstr &MI = *I; - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); // Consider all allocatable registers in the register class initially BitVector Candidates = TRI->getAllocatableSet(MF, RC); @@ -558,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n"); return SReg; } ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); Scavenged.Restore = &*std::prev(UseMI); - DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << - "\n"); + DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI) + << "\n"); return SReg; } @@ -595,10 +598,10 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); Scavenged.Restore = &*std::prev(SpillBefore); LiveUnits.removeReg(Reg); - DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI) - << " until " << *SpillBefore); + DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) + << " until " << *SpillBefore); } else { - DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n'); + DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n'); } return Reg; } @@ -769,13 +772,16 @@ void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { } namespace { + /// This class runs register scavenging independ of the PrologEpilogInserter. /// This is used in for testing. class ScavengerTest : public MachineFunctionPass { public: static char ID; + ScavengerTest() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) { + + bool runOnMachineFunction(MachineFunction &MF) override { const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetFrameLowering &TFL = *STI.getFrameLowering(); @@ -792,9 +798,10 @@ public: return true; } }; -char ScavengerTest::ID; } // end anonymous namespace +char ScavengerTest::ID; + INITIALIZE_PASS(ScavengerTest, "scavenger-test", "Scavenge virtual registers inside basic blocks", false, false) diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index 30757f070cad..4e42deb406e1 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -12,17 +12,17 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -97,7 +97,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg)) - OS << TRI->getName(PReg) << " "; + OS << printReg(PReg, TRI) << " "; } OS << "\n"; } diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index bd5ecbd28f29..1e1f36a35ecc 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -10,32 +10,32 @@ /// Rename independent subregisters looks for virtual registers with /// independently used subregisters and renames them to new virtual registers. /// Example: In the following: -/// %vreg0:sub0<read-undef> = ... -/// %vreg0:sub1 = ... -/// use %vreg0:sub0 -/// %vreg0:sub0 = ... -/// use %vreg0:sub0 -/// use %vreg0:sub1 +/// %0:sub0<read-undef> = ... +/// %0:sub1 = ... +/// use %0:sub0 +/// %0:sub0 = ... +/// use %0:sub0 +/// use %0:sub1 /// sub0 and sub1 are never used together, and we have two independent sub0 /// definitions. This pass will rename to: -/// %vreg0:sub0<read-undef> = ... -/// %vreg1:sub1<read-undef> = ... -/// use %vreg1:sub1 -/// %vreg2:sub1<read-undef> = ... -/// use %vreg2:sub1 -/// use %vreg0:sub0 +/// %0:sub0<read-undef> = ... +/// %1:sub1<read-undef> = ... +/// use %1:sub1 +/// %2:sub1<read-undef> = ... +/// use %2:sub1 +/// use %0:sub0 // //===----------------------------------------------------------------------===// #include "LiveRangeUtils.h" #include "PHIEliminationUtils.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; @@ -134,15 +134,15 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); SmallVector<LiveInterval*, 4> Intervals; Intervals.push_back(&LI); - DEBUG(dbgs() << PrintReg(Reg) << ": Found " << Classes.getNumClasses() + DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses() << " equivalence classes.\n"); - DEBUG(dbgs() << PrintReg(Reg) << ": Splitting into newly created:"); + DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; ++I) { unsigned NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg); Intervals.push_back(&NewLI); - DEBUG(dbgs() << ' ' << PrintReg(NewVReg)); + DEBUG(dbgs() << ' ' << printReg(NewVReg)); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp index 01b3db43b283..f1885aa74285 100644 --- a/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -51,7 +51,7 @@ namespace { ++NumFunctionsReset; MF.reset(); if (EmitFallbackDiag) { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); DiagnosticInfoISelFallback DiagFallback(F); F.getContext().diagnose(DiagFallback); } diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index 8584a9b7c897..51233be521be 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -1,4 +1,4 @@ -//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===// +//===- SafeStack.cpp - Safe Stack Insertion -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -17,37 +17,56 @@ #include "SafeStackColoring.h" #include "SafeStackLayout.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_os_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <string> +#include <utility> using namespace llvm; using namespace llvm::safestack; @@ -255,16 +274,16 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { assert(V == UI.get()); switch (I->getOpcode()) { - case Instruction::Load: { + case Instruction::Load: if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr, AllocaSize)) return false; break; - } + case Instruction::VAArg: // "va-arg" from a pointer is safe. break; - case Instruction::Store: { + case Instruction::Store: if (V == I->getOperand(0)) { // Stored the pointer - conservatively assume it may be unsafe. DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr @@ -276,11 +295,10 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { AllocaPtr, AllocaSize)) return false; break; - } - case Instruction::Ret: { + + case Instruction::Ret: // Information leak. return false; - } case Instruction::Call: case Instruction::Invoke: { @@ -372,7 +390,7 @@ void SafeStack::findInsts(Function &F, StackRestorePoints.push_back(LP); } else if (auto II = dyn_cast<IntrinsicInst>(&I)) { if (II->getIntrinsicID() == Intrinsic::gcroot) - llvm::report_fatal_error( + report_fatal_error( "gcroot intrinsic not compatible with safestack attribute"); } } @@ -540,7 +558,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // Replace alloc with the new location. replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - /*Deref=*/false, -Offset); + DIExpression::NoDeref, -Offset, DIExpression::NoDeref); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); @@ -555,7 +573,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (Size == 0) Size = 1; // Don't create zero-sized stack objects. - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref, + -Offset, DIExpression::NoDeref); replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); // Replace uses of the alloca with the new location. @@ -645,7 +664,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (AI->hasName() && isa<Instruction>(NewAI)) NewAI->takeName(AI); - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false); + replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0, + DIExpression::NoDeref); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } @@ -764,11 +784,12 @@ bool SafeStack::run() { } class SafeStackLegacyPass : public FunctionPass { - const TargetMachine *TM; + const TargetMachine *TM = nullptr; public: static char ID; // Pass identification, replacement for typeid.. - SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) { + + SafeStackLegacyPass() : FunctionPass(ID) { initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -817,9 +838,10 @@ public: } }; -} // anonymous namespace +} // end anonymous namespace char SafeStackLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE, "Safe Stack instrumentation pass", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp index 21f2fa497233..072e6e090e1e 100644 --- a/lib/CodeGen/SafeStackColoring.cpp +++ b/lib/CodeGen/SafeStackColoring.cpp @@ -1,4 +1,4 @@ -//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===// +//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===// // // The LLVM Compiler Infrastructure // @@ -8,12 +8,25 @@ //===----------------------------------------------------------------------===// #include "SafeStackColoring.h" - +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <tuple> +#include <utility> using namespace llvm; using namespace llvm::safestack; diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h index 08b179ccb7f1..902e63ebeb7e 100644 --- a/lib/CodeGen/SafeStackColoring.h +++ b/lib/CodeGen/SafeStackColoring.h @@ -1,4 +1,4 @@ -//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===// +//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -10,16 +10,23 @@ #ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H #define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_os_ostream.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <utility> namespace llvm { -class AllocaInst; + +class BasicBlock; +class Function; +class Instruction; namespace safestack { + /// Compute live ranges of allocas. /// Live ranges are represented as sets of "interesting" instructions, which are /// defined as instructions that may start or end an alloca's lifetime. These @@ -35,10 +42,13 @@ class StackColoring { struct BlockLifetimeInfo { /// Which slots BEGINs in each basic block. BitVector Begin; + /// Which slots ENDs in each basic block. BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. BitVector LiveOut; }; @@ -48,11 +58,14 @@ public: /// live. struct LiveRange { BitVector bv; + void SetMaximum(int size) { bv.resize(size); } void AddRange(unsigned start, unsigned end) { bv.set(start, end); } + bool Overlaps(const LiveRange &Other) const { return bv.anyCommon(Other.bv); } + void Join(const LiveRange &Other) { bv |= Other.bv; } }; @@ -60,13 +73,15 @@ private: Function &F; /// Maps active slots (per bit) for each basic block. - typedef DenseMap<BasicBlock *, BlockLifetimeInfo> LivenessMap; + using LivenessMap = DenseMap<BasicBlock *, BlockLifetimeInfo>; LivenessMap BlockLiveness; /// Number of interesting instructions. - int NumInst; + int NumInst = -1; + /// Numeric ids for interesting instructions. DenseMap<Instruction *, unsigned> InstructionNumbering; + /// A range [Start, End) of instruction ids for each basic block. /// Instructions inside each BB have monotonic and consecutive ids. DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange; @@ -74,6 +89,7 @@ private: ArrayRef<AllocaInst *> Allocas; unsigned NumAllocas; DenseMap<AllocaInst *, unsigned> AllocaNumbering; + /// LiveRange for allocas. SmallVector<LiveRange, 8> LiveRanges; @@ -101,7 +117,7 @@ private: public: StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas) - : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {} + : F(F), Allocas(Allocas), NumAllocas(Allocas.size()) {} void run(); void removeAllMarkers(); @@ -143,7 +159,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, return OS << R.bv; } -} // namespace safestack -} // namespace llvm +} // end namespace safestack + +} // end namespace llvm #endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp index 7d4dbd13abf4..b1759359e46f 100644 --- a/lib/CodeGen/SafeStackLayout.cpp +++ b/lib/CodeGen/SafeStackLayout.cpp @@ -1,4 +1,4 @@ -//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===// +//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===// // // The LLVM Compiler Infrastructure // @@ -8,9 +8,15 @@ //===----------------------------------------------------------------------===// #include "SafeStackLayout.h" - -#include "llvm/IR/Instructions.h" +#include "SafeStackColoring.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> using namespace llvm; using namespace llvm::safestack; diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h index 313ed21c8869..7c1292f251f7 100644 --- a/lib/CodeGen/SafeStackLayout.h +++ b/lib/CodeGen/SafeStackLayout.h @@ -1,4 +1,4 @@ -//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===// +//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,14 @@ #define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H #include "SafeStackColoring.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" namespace llvm { + +class raw_ostream; +class Value; + namespace safestack { /// Compute the layout of an unsafe stack frame. @@ -23,10 +29,12 @@ class StackLayout { unsigned Start; unsigned End; StackColoring::LiveRange Range; + StackRegion(unsigned Start, unsigned End, const StackColoring::LiveRange &Range) : Start(Start), End(End), Range(Range) {} }; + /// The list of current stack regions, sorted by StackRegion::Start. SmallVector<StackRegion, 16> Regions; @@ -35,6 +43,7 @@ class StackLayout { unsigned Size, Alignment; StackColoring::LiveRange Range; }; + SmallVector<StackObject, 8> StackObjects; DenseMap<const Value *, unsigned> ObjectOffsets; @@ -43,6 +52,7 @@ class StackLayout { public: StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {} + /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. void addObject(const Value *V, unsigned Size, unsigned Alignment, @@ -59,10 +69,12 @@ public: /// Returns the alignment of the frame. unsigned getFrameAlignment() { return MaxAlignment; } + void print(raw_ostream &OS); }; -} // namespace safestack -} // namespace llvm +} // end namespace safestack + +} // end namespace llvm #endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 07b43a82ca99..cef413f9d410 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -1,5 +1,5 @@ -//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===// -//=== instrinsics ===// +//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// +// instrinsics // // The LLVM Compiler Infrastructure // @@ -14,9 +14,26 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> using namespace llvm; @@ -25,13 +42,15 @@ using namespace llvm; namespace { class ScalarizeMaskedMemIntrin : public FunctionPass { - const TargetTransformInfo *TTI; + const TargetTransformInfo *TTI = nullptr; public: static char ID; // Pass identification, replacement for typeid - explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) { + + explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) { initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry()); } + bool runOnFunction(Function &F) override; StringRef getPassName() const override { @@ -46,9 +65,11 @@ private: bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); }; -} // namespace + +} // end anonymous namespace char ScalarizeMaskedMemIntrin::ID = 0; + INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE, "Scalarize unsupported masked memory intrinsics", false, false) @@ -156,7 +177,6 @@ static void scalarizeMaskedLoad(CallInst *CI) { Value *PrevPhi = UndefVal; for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] @@ -288,7 +308,6 @@ static void scalarizeMaskedStore(CallInst *CI) { } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx @@ -408,7 +427,6 @@ static void scalarizeMaskedGather(CallInst *CI) { Value *PrevPhi = UndefVal; for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration // // %Mask1 = extractelement <16 x i1> %Mask, i32 1 @@ -610,13 +628,12 @@ bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::masked_load: { + case Intrinsic::masked_load: // Scalarize unsupported vector masked load if (!TTI->isLegalMaskedLoad(CI->getType())) { scalarizeMaskedLoad(CI); @@ -624,24 +641,21 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, return true; } return false; - } - case Intrinsic::masked_store: { + case Intrinsic::masked_store: if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { scalarizeMaskedStore(CI); ModifiedDT = true; return true; } return false; - } - case Intrinsic::masked_gather: { + case Intrinsic::masked_gather: if (!TTI->isLegalMaskedGather(CI->getType())) { scalarizeMaskedGather(CI); ModifiedDT = true; return true; } return false; - } - case Intrinsic::masked_scatter: { + case Intrinsic::masked_scatter: if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { scalarizeMaskedScatter(CI); ModifiedDT = true; @@ -649,7 +663,6 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, } return false; } - } } return false; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 5e95f760aaa2..0635e8f41ee7 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,13 +19,13 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <iterator> @@ -80,7 +80,7 @@ raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { case Data: OS << " Latency=" << getLatency(); if (TRI && isAssignedRegDep()) - OS << " Reg=" << PrintReg(getReg(), TRI); + OS << " Reg=" << printReg(getReg(), TRI); break; case Anti: case Output: diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 99baa07390eb..9249fa84b38b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,7 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -36,6 +36,8 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" @@ -52,8 +54,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <iterator> @@ -114,16 +114,18 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction()->getContext()))) { + Type::getVoidTy(mf.getFunction().getContext()))) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); SchedModel.init(ST.getSchedModel(), &ST, TII); } -/// If this machine instr has memory reference information and it can be tracked -/// to a normal reference to a known object, return the Value for that object. -static void getUnderlyingObjectsForInstr(const MachineInstr *MI, +/// If this machine instr has memory reference information and it can be +/// tracked to a normal reference to a known object, return the Value +/// for that object. This function returns false the memory location is +/// unknown or may alias anything. +static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL) { @@ -151,7 +153,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); } else if (const Value *V = MMO->getValue()) { SmallVector<Value *, 4> Objs; - getUnderlyingObjectsForCodeGen(V, Objs, DL); + if (!getUnderlyingObjectsForCodeGen(V, Objs, DL)) + return false; for (Value *V : Objs) { assert(isIdentifiedObject(V)); @@ -163,8 +166,12 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return true; }; - if (!allMMOsOkay()) + if (!allMMOsOkay()) { Objects.clear(); + return false; + } + + return true; } void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { @@ -769,7 +776,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (PDiffs != nullptr) PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); - RPTracker->recedeSkipDebugValues(); + if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI) + RPTracker->recedeSkipDebugValues(); assert(&*RPTracker->getPos() == &MI && "RPTracker in sync"); RPTracker->recede(RegOpers); } @@ -860,13 +868,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Find the underlying objects for MI. The Objs vector is either // empty, or filled with the Values of memory locations which this - // SU depends on. An empty vector means the memory location is - // unknown, and may alias anything. + // SU depends on. UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout()); + bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs, + MF.getDataLayout()); if (MI.mayStore()) { - if (Objs.empty()) { + if (!ObjsFound) { // An unknown store depends on all stores and loads. addChainDependencies(SU, Stores); addChainDependencies(SU, NonAliasStores); @@ -901,7 +909,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addChainDependencies(SU, Stores, UnknownValue); } } else { // SU is a load. - if (Objs.empty()) { + if (!ObjsFound) { // An unknown load depends on all stores. addChainDependencies(SU, Stores); addChainDependencies(SU, NonAliasStores); @@ -1036,7 +1044,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, } void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { - DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n'); + DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n'); LiveRegs.init(*TRI); LiveRegs.addLiveOuts(MBB); @@ -1348,7 +1356,7 @@ static bool hasDataSucc(const SUnit *SU) { /// search from this root. void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { if (!IsBottomUp) - llvm_unreachable("Top-down ILP metric is unimplemnted"); + llvm_unreachable("Top-down ILP metric is unimplemented"); SchedDFSImpl Impl(*this); for (const SUnit &SU : SUnits) { diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index bb6a45996f63..37c4a470bd0a 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -14,14 +14,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include <fstream> using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b3d83d5313af..b789e2d9c52c 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <cassert> using namespace llvm; @@ -32,6 +32,7 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer( const char *ParentDebugType) : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II), DAG(SchedDAG) { + (void)DebugType; // Determine the maximum depth of any itinerary. This determines the depth of // the scoreboard. We always make the scoreboard at least 1 cycle deep to // avoid dealing with the boundary condition. diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index ae9c5adb0397..fd1e5e2cfc56 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -24,7 +24,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGTargetInfo.cpp StatepointLowering.cpp TargetLowering.cpp - + DEPENDS intrinsics_gen ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 432c86dd6f1e..f97732c1c49d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1,4 +1,4 @@ -//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// // // The LLVM Compiler Infrastructure // @@ -16,32 +16,64 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "dagcombine" @@ -53,43 +85,41 @@ STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); -namespace { - static cl::opt<bool> - CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, - cl::desc("Enable DAG combiner's use of IR alias analysis")); +static cl::opt<bool> +CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Enable DAG combiner's use of IR alias analysis")); - static cl::opt<bool> - UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), - cl::desc("Enable DAG combiner's use of TBAA")); +static cl::opt<bool> +UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), + cl::desc("Enable DAG combiner's use of TBAA")); #ifndef NDEBUG - static cl::opt<std::string> - CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, - cl::desc("Only use DAG-combiner alias analysis in this" - " function")); +static cl::opt<std::string> +CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); #endif - /// Hidden option to stress test load slicing, i.e., when this option - /// is enabled, load slicing bypasses most of its profitability guards. - static cl::opt<bool> - StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, - cl::desc("Bypass the profitability model of load " - "slicing"), - cl::init(false)); +/// Hidden option to stress test load slicing, i.e., when this option +/// is enabled, load slicing bypasses most of its profitability guards. +static cl::opt<bool> +StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load slicing"), + cl::init(false)); - static cl::opt<bool> - MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), - cl::desc("DAG combiner may split indexing from loads")); +static cl::opt<bool> + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); -//------------------------------ DAGCombiner ---------------------------------// +namespace { class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; - bool LegalOperations; - bool LegalTypes; + bool LegalOperations = false; + bool LegalTypes = false; bool ForCodeSize; /// \brief Worklist of all of the nodes that need to be simplified. @@ -128,6 +158,19 @@ namespace { SDValue visit(SDNode *N); public: + DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), AA(AA) { + ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); + + MaximumLegalStoreInBits = 0; + for (MVT VT : MVT::all_valuetypes()) + if (EVT(VT).isSimple() && VT != MVT::Other && + TLI.isTypeLegal(EVT(VT)) && + VT.getSizeInBits() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits(); + } + /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { @@ -285,7 +328,7 @@ namespace { SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); - SDValue visitAssertZext(SDNode *N); + SDValue visitAssertExt(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); @@ -348,6 +391,7 @@ namespace { SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); SDValue foldSelectOfConstants(SDNode *N); + SDValue foldVSelectOfConstants(SDNode *N); SDValue foldBinOpIntoSelect(SDNode *BO); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); @@ -371,6 +415,7 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -400,14 +445,11 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); - SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); SDValue matchVSelectOpSizesWithSetCC(SDNode *N); - SDValue GetDemandedBits(SDValue V, const APInt &Mask); - /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, @@ -434,12 +476,14 @@ namespace { /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { - MemOpLink(LSBaseSDNode *N, int64_t Offset) - : MemNode(N), OffsetFromBase(Offset) {} // Ptr to the mem node. LSBaseSDNode *MemNode; + // Offset from the base ptr. int64_t OffsetFromBase; + + MemOpLink(LSBaseSDNode *N, int64_t Offset) + : MemNode(N), OffsetFromBase(Offset) {} }; /// This is a helper function for visitMUL to check the profitability @@ -450,38 +494,49 @@ namespace { SDValue &AddNode, SDValue &ConstNode); - /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns - /// the type of the loaded value to be extended. LoadedVT returns the type - /// of the original loaded value. NarrowLoad returns whether the load would - /// need to be narrowed in order to match. + /// the type of the loaded value to be extended. bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, - EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, - bool &NarrowLoad); + EVT LoadResultTy, EVT &ExtVT); + + /// Helper function to calculate whether the given Load can have its + /// width reduced to ExtVT. + bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, + EVT &ExtVT, unsigned ShAmt = 0); + + /// Used by BackwardsPropagateMask to find suitable loads. + bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, + SmallPtrSetImpl<SDNode*> &NodeWithConsts, + ConstantSDNode *Mask, SDNode *&UncombinedNode); + /// Attempt to propagate a given AND node back to load leaves so that they + /// can be combined into narrow loads. + bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); /// Helper function for MergeConsecutiveStores which merges the /// component store chains. SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); - /// This is a helper function for MergeConsecutiveStores. When the source - /// elements of the consecutive stores are all constants or all extracted - /// vector elements, try to merge them into one larger store. - /// \return True if a merged store was created. + /// This is a helper function for MergeConsecutiveStores. When the + /// source elements of the consecutive stores are all constants or + /// all extracted vector elements, try to merge them into one + /// larger store introducing bitcasts if necessary. \return True + /// if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc); - /// This is a helper function for MergeConsecutiveStores. - /// Stores that may be merged are placed in StoreNodes. + /// This is a helper function for MergeConsecutiveStores. Stores + /// that potentially may be merged with St are placed in + /// StoreNodes. void getStoreMergeCandidates(StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes); /// Helper function for MergeConsecutiveStores. Checks if - /// Candidate stores have indirect dependency through their - /// operands. \return True if safe to merge + /// candidate stores have indirect dependency through their + /// operands. \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); @@ -500,19 +555,6 @@ namespace { SDValue distributeTruncateThroughAnd(SDNode *N); public: - DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); - - MaximumLegalStoreInBits = 0; - for (MVT VT : MVT::all_valuetypes()) - if (EVT(VT).isSimple() && VT != MVT::Other && - TLI.isTypeLegal(EVT(VT)) && - VT.getSizeInBits() >= MaximumLegalStoreInBits) - MaximumLegalStoreInBits = VT.getSizeInBits(); - } - /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -541,14 +583,12 @@ namespace { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } }; -} - -namespace { /// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; + public: explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} @@ -557,7 +597,8 @@ public: DC.removeFromWorklist(N); } }; -} + +} // end anonymous namespace //===----------------------------------------------------------------------===// // TargetLowering::DAGCombinerInfo implementation @@ -577,7 +618,6 @@ CombineTo(SDNode *N, SDValue Res, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); } - SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); @@ -873,6 +913,56 @@ static bool isAnyConstantBuildVector(const SDNode *N) { ISD::isBuildVectorOfConstantFPSDNodes(N); } +// Attempt to match a unary predicate against a scalar/splat constant or +// every element of a constant BUILD_VECTOR. +static bool matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match) { + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) + return Match(Cst); + + if (ISD::BUILD_VECTOR != Op.getOpcode()) + return false; + + EVT SVT = Op.getValueType().getScalarType(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + return false; + } + return true; +} + +// Attempt to match a binary predicate against a pair of scalar/splat constants +// or every element of a pair of constant BUILD_VECTORs. +static bool matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { + if (LHS.getValueType() != RHS.getValueType()) + return false; + + if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) + if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) + return Match(LHSCst, RHSCst); + + if (ISD::BUILD_VECTOR != LHS.getOpcode() || + ISD::BUILD_VECTOR != RHS.getOpcode()) + return false; + + EVT SVT = LHS.getValueType().getScalarType(); + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); + if (!LHSCst || !RHSCst) + return false; + if (LHSCst->getValueType(0) != SVT || + LHSCst->getValueType(0) != RHSCst->getValueType(0)) + return false; + if (!Match(LHSCst, RHSCst)) + return false; + } + return true; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -1123,10 +1213,10 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { Replace0 &= !N0->hasOneUse(); Replace1 &= (N0 != N1) && !N1->hasOneUse(); - // Combine Op here so it is presreved past replacements. + // Combine Op here so it is preserved past replacements. CombineTo(Op.getNode(), RV); - // If operands have a use ordering, make sur we deal with + // If operands have a use ordering, make sure we deal with // predecessor first. if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { std::swap(N0, N1); @@ -1473,7 +1563,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); - case ISD::AssertZext: return visitAssertZext(N); + case ISD::AssertSext: + case ISD::AssertZext: return visitAssertExt(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); @@ -1572,15 +1663,15 @@ SDValue DAGCombiner::combine(SDNode *N) { } } - // If N is a commutative binary node, try commuting it to enable more - // sdisel CSE. + // If N is a commutative binary node, try eliminate it if the commuted + // version is already present in the DAG. if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Constant operands are canonicalized to RHS. - if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { SDValue Ops[] = {N1, N0}; SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, N->getFlags()); @@ -1632,7 +1723,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // Check each of the operands. for (const SDValue &Op : TF->op_values()) { - switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are @@ -1907,6 +1997,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } } + + // Undo the add -> or combine to merge constant offsets from a frame index. + if (N0.getOpcode() == ISD::OR && + isa<FrameIndexSDNode>(N0.getOperand(0)) && + isa<ConstantSDNode>(N0.getOperand(1)) && + DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -2064,7 +2163,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) } // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) - if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && + N1.getResNo() == 0) return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), N0, N1.getOperand(0), N1.getOperand(2)); @@ -2537,6 +2637,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); + assert((!N0IsConst || + ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && + "Splat APInt should be element width"); + assert((!N1IsConst || + ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && + "Splat APInt should be element width"); } else { N0IsConst = isa<ConstantSDNode>(N0); if (N0IsConst) { @@ -2562,12 +2668,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isNullValue()) return N1; - // We require a splat of the entire scalar bit width for non-contiguous - // bit patterns. - bool IsFullSplat = - ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat) + if (N1IsConst && ConstValue1.isOneValue()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -2580,16 +2682,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(0, DL, VT), N0); } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && - IsFullSplat) { + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1) && + (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { SDLoc DL(N); - return DAG.getNode(ISD::SHL, DL, VT, N0, - DAG.getConstant(ConstValue1.logBase2(), DL, - getShiftAmountTy(N0.getValueType()))); + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && - IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a @@ -2835,7 +2941,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. Targets may check function attributes for size/speed // trade-offs. - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -2906,7 +3012,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } // fold (udiv x, c) -> alternate - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; @@ -2965,7 +3071,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { } } - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. @@ -3003,19 +3109,26 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) { + // fold (mulhs x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + } + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (isOneConstant(N1)) { - SDLoc DL(N); + if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - } + // fold (mulhs x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -3043,6 +3156,14 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) { + // fold (mulhu x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + } + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -3216,7 +3337,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - // fold (add c1, c2) -> c1+c2 + // fold operation with constant operands. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) @@ -3599,22 +3720,20 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { } bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, - EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, - bool &NarrowLoad) { - uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); - - if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits)) + EVT LoadResultTy, EVT &ExtVT) { + if (!AndC->getAPIntValue().isMask()) return false; + unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - LoadedVT = LoadN->getMemoryVT(); + EVT LoadedVT = LoadN->getMemoryVT(); if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { // ZEXTLOAD will match without needing to change the size of the value being // loaded. - NarrowLoad = false; return true; } @@ -3634,10 +3753,185 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) return false; - NarrowLoad = true; return true; } +bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, + EVT &ExtVT, unsigned ShAmt) { + // Don't transform one with multiple uses, this would require adding a new + // load. + if (!SDValue(LoadN, 0).hasOneUse()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return false; + + // Don't change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Verify that we are actually reducing a load width here. + if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) + return false; + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LoadN->getNumValues() > 2) + return false; + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && + LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) + return false; + + // It's not possible to generate a constant of extended or untyped type. + EVT PtrType = LoadN->getOperand(1).getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + return true; +} + +bool DAGCombiner::SearchForAndLoads(SDNode *N, + SmallPtrSetImpl<LoadSDNode*> &Loads, + SmallPtrSetImpl<SDNode*> &NodesWithConsts, + ConstantSDNode *Mask, + SDNode *&NodeToMask) { + // Recursively search for the operands, looking for loads which can be + // narrowed. + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getValueType().isVector()) + return false; + + // Some constants may need fixing up later if they are too large. + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && + (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) + NodesWithConsts.insert(N); + continue; + } + + if (!Op.hasOneUse()) + return false; + + switch(Op.getOpcode()) { + case ISD::LOAD: { + auto *Load = cast<LoadSDNode>(Op); + EVT ExtVT; + if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && + isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { + // Only add this load if we can make it more narrow. + if (ExtVT.bitsLT(Load->getMemoryVT())) + Loads.insert(Load); + continue; + } + return false; + } + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::AssertZext: { + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast<VTSDNode>(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); + + // We can accept extending nodes if the mask is wider or an equal + // width to the original type. + if (ExtVT.bitsGE(VT)) + continue; + break; + } + case ISD::OR: + case ISD::XOR: + case ISD::AND: + if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, + NodeToMask)) + return false; + continue; + } + + // Allow one node which will masked along with any loads found. + if (NodeToMask) + return false; + NodeToMask = Op.getNode(); + } + return true; +} + +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { + auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!Mask) + return false; + + if (!Mask->getAPIntValue().isMask()) + return false; + + // No need to do anything if the and directly uses a load. + if (isa<LoadSDNode>(N->getOperand(0))) + return false; + + SmallPtrSet<LoadSDNode*, 8> Loads; + SmallPtrSet<SDNode*, 2> NodesWithConsts; + SDNode *FixupNode = nullptr; + if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { + if (Loads.size() == 0) + return false; + + SDValue MaskOp = N->getOperand(1); + + // If it exists, fixup the single node we allow in the tree that needs + // masking. + if (FixupNode) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), + FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), + MaskOp); + } + + // Narrow any constants that need it. + for (auto *LogicN : NodesWithConsts) { + auto *C = cast<ConstantSDNode>(LogicN->getOperand(1)); + SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0), + SDValue(C, 0), MaskOp); + DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And); + } + + // Create narrow loads. + for (auto *Load : Loads) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), + SDValue(Load, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); + SDValue NewLoad = ReduceLoadWidth(And.getNode()); + assert(NewLoad && + "Shouldn't be masking the load if it can't be narrowed"); + CombineTo(Load, NewLoad, NewLoad.getValue(1)); + } + DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); + return true; + } + return false; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3829,55 +4123,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || (N0.getOpcode() == ISD::ANY_EXTEND && N0.getOperand(0).getOpcode() == ISD::LOAD))) { - bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; - LoadSDNode *LN0 = HasAnyExt - ? cast<LoadSDNode>(N0.getOperand(0)) - : cast<LoadSDNode>(N0); - if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - auto NarrowLoad = false; - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - EVT ExtVT, LoadedVT; - if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, - NarrowLoad)) { - if (!NarrowLoad) { - SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), ExtVT, - LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(LN0, NewLoad, NewLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } else { - EVT PtrType = LN0->getOperand(1).getValueType(); - - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); - - // For big endian targets, we need to add an offset to the pointer - // to load the correct bytes. For little endian systems, we merely - // need to read fewer bytes from the same pointer. - if (DAG.getDataLayout().isBigEndian()) { - unsigned LVTStoreBytes = LoadedVT.getStoreSize(); - unsigned EVTStoreBytes = ExtVT.getStoreSize(); - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - SDLoc DL(LN0); - NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, - NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); - } + if (SDValue Res = ReduceLoadWidth(N)) { + LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND + ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); - AddToWorklist(NewPtr.getNode()); + AddToWorklist(N); + CombineTo(LN0, Res, Res.getValue(1)); + return SDValue(N, 0); + } + } - SDValue Load = DAG.getExtLoad( - ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, - LN0->getPointerInfo(), ExtVT, Alignment, - LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - AddToWorklist(N); - CombineTo(LN0, Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + if (Level >= AfterLegalizeTypes) { + // Attempt to propagate the AND back up to the leaves which, if they're + // loads, can be combined to narrow loads and the AND node can be removed. + // Perform after legalization so that extend nodes will already be + // combined into the loads. + if (BackwardsPropagateMask(N, DAG)) { + return SDValue(N, 0); } } @@ -3974,7 +4236,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); - // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) bool LookPassAnd0 = false; bool LookPassAnd1 = false; if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) @@ -4593,20 +4855,6 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, return nullptr; } -// if Left + Right == Sum (constant or constant splat vector) -static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum, - SelectionDAG &DAG, const SDLoc &DL) { - EVT ShiftVT = Left.getValueType(); - if (ShiftVT != Right.getValueType()) return false; - - SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT, - Left.getNode(), Right.getNode()); - if (!ShiftSum) return false; - - ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum); - return CSum && CSum->getZExtValue() == Sum; -} - // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. @@ -4620,6 +4868,16 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; + // Check for truncated rotate. + if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { + assert(LHS.getValueType() == RHS.getValueType()); + if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), + SDValue(Rot, 0)).getNode(); + } + } + // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. @@ -4652,7 +4910,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) { + auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; + }; + if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -4712,20 +4974,22 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } namespace { + /// Represents known origin of an individual byte in load combine pattern. The /// value of the byte is either constant zero or comes from memory. struct ByteProvider { // For constant zero providers Load is set to nullptr. For memory providers // Load represents the node which loads the byte from memory. // ByteOffset is the offset of the byte in the value produced by the load. - LoadSDNode *Load; - unsigned ByteOffset; + LoadSDNode *Load = nullptr; + unsigned ByteOffset = 0; - ByteProvider() : Load(nullptr), ByteOffset(0) {} + ByteProvider() = default; static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { return ByteProvider(Load, ByteOffset); } + static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } bool isConstantZero() const { return !Load; } @@ -4740,6 +5004,8 @@ private: : Load(Load), ByteOffset(ByteOffset) {} }; +} // end anonymous namespace + /// Recursively traverses the expression calculating the origin of the requested /// byte of the given value. Returns None if the provider can't be calculated. /// @@ -4751,9 +5017,9 @@ private: /// Because the parts of the expression are not allowed to have more than one /// use this function iterates over trees, not DAGs. So it never visits the same /// node more than once. -const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index, - unsigned Depth, - bool Root = false) { +static const Optional<ByteProvider> +calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, + bool Root = false) { // Typical i64 by i8 pattern requires recursion up to 8 calls depth if (Depth == 10) return None; @@ -4837,7 +5103,6 @@ const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index, return None; } -} // namespace /// Match a pattern where a wide type scalar value is loaded by several narrow /// loads and combined by shifts and ors. Fold it into a single load or a load @@ -4950,7 +5215,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { Loads.insert(L); } - assert(Loads.size() > 0 && "All the bytes of the value must be loaded from " + assert(!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value"); assert(Base && "Base address of the accessed memory location must be set"); assert(FirstOffset != INT64_MAX && "First byte offset must be set"); @@ -5373,7 +5638,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef - if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. + auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { + return Val->getAPIntValue().uge(OpSizeInBits); + }; + if (matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5400,20 +5669,29 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - SDLoc DL(N); - APInt c1 = N0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); + if (N0.getOpcode() == ISD::SHL) { + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getConstant(0, SDLoc(N), VT); - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - return DAG.getConstant(0, DL, VT); - - return DAG.getNode( - ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); } } @@ -5527,16 +5805,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. - if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && + N0.getNode()->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); AddToWorklist(Shl0.getNode()); AddToWorklist(Shl1.getNode()); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) @@ -5579,7 +5859,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra x, c >= size(x)) -> undef - if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. + auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { + return Val->getAPIntValue().uge(OpSizeInBits); + }; + if (matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5603,20 +5887,31 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - SDLoc DL(N); - APInt c1 = N0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); - zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); - - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - Sum = APInt(OpSizeInBits, OpSizeInBits - 1); + if (N0.getOpcode() == ISD::SRA) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); - return DAG.getNode( - ISD::SRA, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); + + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); } } @@ -5647,7 +5942,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); SDValue Amt = DAG.getConstant(ShiftAmt, DL, getShiftAmountTy(N0.getOperand(0).getValueType())); @@ -5697,7 +5991,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); @@ -5730,7 +6023,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef - if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. + auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { + return Val->getAPIntValue().uge(OpSizeInBits); + }; + if (matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5745,20 +6042,29 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRL) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - SDLoc DL(N); - APInt c1 = N0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); + if (N0.getOpcode() == ISD::SRL) { + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getConstant(0, SDLoc(N), VT); - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - return DAG.getConstant(0, DL, VT); - - return DAG.getNode( - ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); } } @@ -6008,7 +6314,6 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } - /// \brief Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, @@ -6096,7 +6401,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // For any constants that differ by 1, we can transform the select into an // extend and add. Use a target hook because some targets may prefer to // transform in the other direction. - if (TLI.convertSelectOfConstantsToMath()) { + if (TLI.convertSelectOfConstantsToMath(VT)) { if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) @@ -6371,7 +6676,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); @@ -6432,7 +6736,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { } SDValue DAGCombiner::visitMSTORE(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); @@ -6447,7 +6750,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) @@ -6504,11 +6806,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { } SDValue DAGCombiner::visitMGATHER(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); - MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); + MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); SDValue Mask = MGT->getMask(); SDLoc DL(N); @@ -6581,7 +6882,6 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { } SDValue DAGCombiner::visitMLOAD(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); @@ -6593,7 +6893,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { EVT VT = N->getValueType(0); @@ -6665,6 +6964,57 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { return SDValue(); } +/// A vector select of 2 constant vectors can be simplified to math/logic to +/// avoid a variable select instruction and possibly avoid constant loads. +SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { + SDValue Cond = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || + !TLI.convertSelectOfConstantsToMath(VT) || + !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || + !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) + return SDValue(); + + // Check if we can use the condition value to increment/decrement a single + // constant value. This simplifies a select to an add and removes a constant + // load/materialization from the general case. + bool AllAddOne = true; + bool AllSubOne = true; + unsigned Elts = VT.getVectorNumElements(); + for (unsigned i = 0; i != Elts; ++i) { + SDValue N1Elt = N1.getOperand(i); + SDValue N2Elt = N2.getOperand(i); + if (N1Elt.isUndef() || N2Elt.isUndef()) + continue; + + const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); + const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); + if (C1 != C2 + 1) + AllAddOne = false; + if (C1 != C2 - 1) + AllSubOne = false; + } + + // Further simplifications for the extra-special cases where the constants are + // all 0 or all -1 should be implemented as folds of these patterns. + SDLoc DL(N); + if (AllAddOne || AllSubOne) { + // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C + // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C + auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; + SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); + } + + // The general case for select-of-constants: + // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 + // ...but that only makes sense if a vselect is slower than 2 logic ops, so + // leave that to a machine-specific pass. + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6729,6 +7079,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return CV; } + if (SDValue V = foldVSelectOfConstants(N)) + return V; + return SDValue(); } @@ -7243,8 +7596,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); + bool NoReplaceTruncAnd = !N0.hasOneUse(); bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); CombineTo(N, And); + // If N0 has multiple uses, change other uses as well. + if (NoReplaceTruncAnd) { + SDValue TruncAnd = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); + CombineTo(N0.getNode(), TruncAnd); + } if (NoReplaceTrunc) DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); else @@ -7307,7 +7667,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector()) { + if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. @@ -7399,20 +7759,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } - // fold (zext (truncate (load x))) -> (zext (smaller load x)) - // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) - if (N0.getOpcode() == ISD::TRUNCATE) { - if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode *oye = N0.getOperand(0).getNode(); - if (NarrowLoad.getNode() != N0.getNode()) { - CombineTo(N0.getNode(), NarrowLoad); - // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorklist(oye); - } - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) @@ -7445,7 +7791,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); AddToWorklist(Op.getNode()); - return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + // We may safely transfer the debug info describing the truncate node over + // to the equivalent and operation. + DAG.transferDbgValues(N0, And); + return And; } } @@ -7522,11 +7872,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!N0.hasOneUse()) { if (N0.getOpcode() == ISD::AND) { auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); - auto NarrowLoad = false; EVT LoadResultTy = AndC->getValueType(0); - EVT ExtVT, LoadedVT; - if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, - NarrowLoad)) + EVT ExtVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT)) DoXform = false; } if (DoXform) @@ -7547,8 +7895,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); + bool NoReplaceTruncAnd = !N0.hasOneUse(); bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); CombineTo(N, And); + // If N0 has multiple uses, change other uses as well. + if (NoReplaceTruncAnd) { + SDValue TruncAnd = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); + CombineTo(N0.getNode(), TruncAnd); + } if (NoReplaceTrunc) DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); else @@ -7604,10 +7959,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend. - EVT MatchingElementType = EVT::getIntegerVT( - *DAG.getContext(), N00VT.getScalarSizeInBits()); - EVT MatchingVectorType = EVT::getVectorVT( - *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); @@ -7731,7 +8083,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); - CombineTo(N, ExtLoad); + CombineTo(N, ExtLoad); if (NoReplaceTrunc) DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); else @@ -7769,13 +8121,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { - EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) + return SDValue(); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N00VT.getSizeInBits()) return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -7783,7 +8138,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // elements we can use a matching integer vector type and then // truncate/any extend else { - EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), @@ -7804,77 +8159,47 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitAssertZext(SDNode *N) { +SDValue DAGCombiner::visitAssertExt(SDNode *N) { + unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - EVT EVT = cast<VTSDNode>(N1)->getVT(); + EVT AssertVT = cast<VTSDNode>(N1)->getVT(); - // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt) - if (N0.getOpcode() == ISD::AssertZext && - EVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) + // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) + if (N0.getOpcode() == Opcode && + AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) return N0; - return SDValue(); -} + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == Opcode) { + // We have an assert, truncate, assert sandwich. Make one stronger assert + // by asserting on the smallest asserted type to the larger source type. + // This eliminates the later assert: + // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN + // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + assert(BigA_AssertVT.bitsLE(N0.getValueType()) && + "Asserting zero/sign-extended bits to a type larger than the " + "truncated destination does not provide information"); -/// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by Mask are used. If so, return the simpler operand, -/// otherwise return a null SDValue. -/// -/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can -/// simplify nodes with multiple uses more aggressively.) -SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { - switch (V.getOpcode()) { - default: break; - case ISD::Constant: { - const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV && "Const value should be ConstSDNode."); - const APInt &CVal = CV->getAPIntValue(); - APInt NewVal = CVal & Mask; - if (NewVal != CVal) - return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); - break; + SDLoc DL(N); + EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; + SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), + BigA.getOperand(0), MinAssertVTVal); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } - case ISD::OR: - case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) - return V.getOperand(1); - if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) - return V.getOperand(0); - break; - case ISD::SRL: - // Only look at single-use SRLs. - if (!V.getNode()->hasOneUse()) - break; - if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { - // See if we can recursively simplify the LHS. - unsigned Amt = RHSC->getZExtValue(); - // Watch out for shift count overflow though. - if (Amt >= Mask.getBitWidth()) break; - APInt NewMask = Mask << Amt; - if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) - return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), - SimplifyLHS, V.getOperand(1)); - } - break; - case ISD::AND: { - // X & -1 -> X (ignoring bits which aren't demanded). - ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); - if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask) - return V.getOperand(0); - break; - } - } return SDValue(); } /// If the result of a wider load is shifted to right of N bits and then /// truncated to a narrower type and where N is a multiple of number of bits of /// the narrower type, transform it to a narrower load from address + N / num of -/// bits of new type. If the result is to be extended, also fold the extension -/// to form a extending load. +/// bits of new type. Also narrow the load if the result is masked with an AND +/// to effectively produce a smaller type. If the result is to be extended, also +/// fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); @@ -7893,28 +8218,40 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtType = ISD::SEXTLOAD; ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); } else if (Opc == ISD::SRL) { - // Another special-case: SRL is basically zero-extending a narrower value. + // Another special-case: SRL is basically zero-extending a narrower value, + // or it maybe shifting a higher subword, half or byte into the lowest + // bits. ExtType = ISD::ZEXTLOAD; N0 = SDValue(N, 0); - ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!N01) return SDValue(); - ExtVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() - N01->getZExtValue()); - } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) - return SDValue(); - unsigned EVTBits = ExtVT.getSizeInBits(); + auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); + auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01 || !LN0) + return SDValue(); - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!ExtVT.isRound()) - return SDValue(); + uint64_t ShiftAmt = N01->getZExtValue(); + uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); + if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); + else + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - ShiftAmt); + } else if (Opc == ISD::AND) { + // An AND with a constant mask is the same as a truncate + zero-extend. + auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isMask()) + return SDValue(); + + unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + ExtType = ISD::ZEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + } unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); + unsigned EVTBits = ExtVT.getSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); @@ -7951,42 +8288,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } } - // If we haven't found a load, we can't narrow it. Don't transform one with - // multiple uses, this would require adding a new load. - if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) + // If we haven't found a load, we can't narrow it. + if (!isa<LoadSDNode>(N0)) return SDValue(); - // Don't change the width of a volatile load. LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (LN0->isVolatile()) - return SDValue(); - - // Verify that we are actually reducing a load width here. - if (LN0->getMemoryVT().getSizeInBits() < EVTBits) - return SDValue(); - - // For the transform to be legal, the load must produce only two values - // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the - // transformation is not equivalent, and the downstream logic to replace - // uses gets things wrong. - if (LN0->getNumValues() > 2) - return SDValue(); - - // If the load that we're shrinking is an extload and we're not just - // discarding the extension we can't simply shrink the load. Bail. - // TODO: It would be possible to merge the extensions in some cases. - if (LN0->getExtensionType() != ISD::NON_EXTLOAD && - LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) - return SDValue(); - - if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) - return SDValue(); - - EVT PtrType = N0.getOperand(1).getValueType(); - - if (PtrType == MVT::Untyped || PtrType.isExtended()) - // It's not possible to generate a constant of extended or untyped type. + if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); // For big endian targets, we need to adjust the offset to the pointer to @@ -7997,6 +8304,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } + EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); @@ -8130,10 +8438,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } // fold (sext_inreg (extload x)) -> (sextload x) + // If sextload is not supported by target, we can only do the combine when + // load has one use. Doing otherwise can block folding the extload with other + // extends that the target does support. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && + N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -8208,12 +8520,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // noop truncate if (N0.getValueType() == N->getValueType(0)) return N0; - // fold (truncate c1) -> c1 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + + // fold (truncate c1) -> c1 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); + if (C.getNode() != N) + return C; + } + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || @@ -8245,7 +8563,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { - EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); EVT TrTy = N->getValueType(0); @@ -8311,7 +8628,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && N0.getOperand(0).hasOneUse()) { - SDValue BuildVect = N0.getOperand(0); EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); EVT TruncVecEltTy = VT.getVectorElementType(); @@ -8340,9 +8656,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { - if (SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits()))) + APInt Mask = + APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); + if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } @@ -8413,7 +8729,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Fold truncate of a bitcast of a vector to an extract of the low vector // element. // - // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0 + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue VecSrc = N0.getOperand(0); EVT SrcVT = VecSrc.getValueType(); @@ -8423,8 +8739,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDLoc SL(N); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, - VecSrc, DAG.getConstant(0, SL, IdxVT)); + VecSrc, DAG.getConstant(Idx, SL, IdxVT)); } } @@ -8466,11 +8783,18 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + + // A BUILD_PAIR is always having the least significant part in elt 0 and the + // most significant part in elt 1. So when combining into one large load, we + // need to consider the endianness. + if (DAG.getDataLayout().isBigEndian()) + std::swap(LD1, LD2); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; + unsigned LD1Bytes = LD1VT.getStoreSize(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); @@ -8751,12 +9075,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) return DAG.getBitcast(VT, Op); return SDValue(); }; + // FIXME: If either input vector is bitcast, try to convert the shuffle to + // the result type of this bitcast. This would eliminate at least one + // bitcast. See the transform in InstCombine. SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); if (!(SV0 && SV1)) @@ -8949,7 +9276,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - bool LookThroughFPExt = TLI.isFPExtFree(VT); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -8979,28 +9305,31 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (LookThroughFPExt) { - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (isContractableFMUL(N00)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), N1); + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); } + } - // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (isContractableFMUL(N10)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), N0); + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); } } @@ -9036,80 +9365,87 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (LookThroughFPExt) { - // fold (fadd (fma x, y, (fpext (fmul u, v))), z) - // -> (fma x, y, (fma (fpext u), (fpext v), z)) - auto FoldFAddFMAFPExtFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); - }; - if (N0.getOpcode() == PreferredFusedOpcode) { - SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { - SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020)) - return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), - N020.getOperand(0), N020.getOperand(1), - N1); + + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); } } + } - // fold (fadd (fpext (fma x, y, (fmul u, v))), z) - // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - auto FoldFAddFPExtFMAFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, X), - DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); - }; - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { - SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002)) - return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), - N002.getOperand(0), N002.getOperand(1), - N1); + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); } } + } - // fold (fadd x, (fma y, z, (fpext (fmul u, v))) - // -> (fma y, z, (fma (fpext u), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode) { - SDValue N12 = N1.getOperand(2); - if (N12.getOpcode() == ISD::FP_EXTEND) { - SDValue N120 = N12.getOperand(0); - if (isContractableFMUL(N120)) - return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), - N120.getOperand(0), N120.getOperand(1), - N0); + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); } } + } - // fold (fadd x, (fpext (fma y, z, (fmul u, v))) - // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == PreferredFusedOpcode) { - SDValue N102 = N10.getOperand(2); - if (isContractableFMUL(N102)) - return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), - N102.getOperand(0), N102.getOperand(1), - N0); + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); } } } @@ -9151,7 +9487,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - bool LookThroughFPExt = TLI.isFPExtFree(VT); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -9187,79 +9522,83 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (LookThroughFPExt) { - // fold (fsub (fpext (fmul x, y)), z) - // -> (fma (fpext x), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (isContractableFMUL(N00)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); } + } - // fold (fsub x, (fpext (fmul y, z))) - // -> (fma (fneg (fpext y)), (fpext z), x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (isContractableFMUL(N10)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), - N0); - } - - // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fneg (fma (fpext x), (fpext y), z)) - // Note: This could be removed with appropriate canonicalization of the - // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the - // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent - // from implementing the canonicalization in visitFSUB. - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FNEG) { - SDValue N000 = N00.getOperand(0); - if (isContractableFMUL(N000)) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1)); - } + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); } } + } - // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fneg (fma (fpext x)), (fpext y), z) - // Note: This could be removed with appropriate canonicalization of the - // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the - // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent - // from implementing the canonicalization in visitFSUB. - if (N0.getOpcode() == ISD::FNEG) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FP_EXTEND) { - SDValue N000 = N00.getOperand(0); - if (isContractableFMUL(N000)) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1)); - } + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); } } - } // More folding opportunities when target permits. @@ -9298,102 +9637,108 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N21, N0)); } - if (LookThroughFPExt) { - // fold (fsub (fma x, y, (fpext (fmul u, v))), z) - // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode) { - SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { - SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1))); - } - } - // fold (fsub (fpext (fma x, y, (fmul u, v))), z) - // -> (fma (fpext x), (fpext y), - // (fma (fpext u), (fpext v), (fneg z))) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { - SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1))); - } - } - - // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) - // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { - SDValue N120 = N1.getOperand(2).getOperand(0); - if (isContractableFMUL(N120)) { - SDValue N1200 = N120.getOperand(0); - SDValue N1201 = N120.getOperand(1); + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), - N1.getOperand(1), + N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1201), - N0)); + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); } } + } - // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) - // -> (fma (fneg (fpext y)), (fpext z), - // (fma (fneg (fpext u)), (fpext v), x)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { - SDValue N100 = N1.getOperand(0).getOperand(0); - SDValue N101 = N1.getOperand(0).getOperand(1); - SDValue N102 = N1.getOperand(0).getOperand(2); - if (isContractableFMUL(N102)) { - SDValue N1020 = N102.getOperand(0); - SDValue N1021 = N102.getOperand(1); + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N100)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1021), - N0)); + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); } } } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue CvtSrc = N1.getOperand(0); + SDValue N100 = CvtSrc.getOperand(0); + SDValue N101 = CvtSrc.getOperand(1); + SDValue N102 = CvtSrc.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } } return SDValue(); @@ -9959,6 +10304,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } + + // fma (fneg x), K, y -> fma x -K, y + if (N0.getOpcode() == ISD::FNEG && + (TLI.isOperationLegal(ISD::ConstantFP, VT) || + (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) { + return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); + } } if (Options.UnsafeFPMath) { @@ -10081,8 +10434,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { (!LegalOperations || // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || - TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(Recip, DL, VT), Flags); @@ -10264,7 +10617,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -10282,7 +10635,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), @@ -10296,7 +10649,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), @@ -10318,7 +10671,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -10333,10 +10686,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // The next optimizations are desirable only if SELECT_CC can be lowered. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), @@ -10557,6 +10909,19 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + // fold ftrunc (known rounded int x) -> x + // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is + // likely to be generated to extract integer from a rounded floating value. + switch (N0.getOpcode()) { + default: break; + case ISD::FRINT: + case ISD::FTRUNC: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + case ISD::FCEIL: + return N0; + } + return SDValue(); } @@ -11160,6 +11525,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); + AddToWorklist(Result.getNode()); return true; } @@ -11445,6 +11811,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } namespace { + /// \brief Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base @@ -11462,21 +11829,19 @@ struct LoadedSlice { struct Cost { /// Are we optimizing for code size. bool ForCodeSize; + /// Various cost. - unsigned Loads; - unsigned Truncates; - unsigned CrossRegisterBanksCopies; - unsigned ZExts; - unsigned Shift; + unsigned Loads = 0; + unsigned Truncates = 0; + unsigned CrossRegisterBanksCopies = 0; + unsigned ZExts = 0; + unsigned Shift = 0; - Cost(bool ForCodeSize = false) - : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), - CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} /// \brief Get the cost of one isolated slice. Cost(const LoadedSlice &LS, bool ForCodeSize = false) - : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), - CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); if (TruncType != LoadedType && @@ -11538,13 +11903,17 @@ struct LoadedSlice { bool operator>=(const Cost &RHS) const { return !(*this < RHS); } }; + // The last instruction that represent the slice. This should be a // truncate instruction. SDNode *Inst; + // The original load instruction. LoadSDNode *Origin; + // The right shift amount in bits from the original load. unsigned Shift; + // The DAG from which Origin came from. // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; @@ -11746,7 +12115,8 @@ struct LoadedSlice { return true; } }; -} + +} // end anonymous namespace /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. @@ -11804,7 +12174,6 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { - Second = &LoadedSlices[CurrSlice]; // If First is NULL, it means we start a new pair. @@ -11935,7 +12304,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) - return 0; + return false; // Build the slice for this chain of computations. LoadedSlice LS(User, LD, Shift, &DAG); @@ -12060,7 +12429,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { return Result; } - /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. @@ -12121,7 +12489,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, .getNode(); } - /// Look for sequence of load / op / store where op is one of 'or', 'xor', and /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try /// narrowing the load and store if it would end up being a win for performance @@ -12325,7 +12692,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, // Walk all the users of the constant with which we're multiplying. for (SDNode *Use : ConstNode->uses()) { - if (Use == MulNode) // This use is the one we're on right now. Skip it. continue; @@ -12376,6 +12742,12 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } +static SDValue peekThroughBitcast(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { SmallVector<SDValue, 8> Chains; @@ -12403,56 +12775,93 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (NumStores < 2) return false; - int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; - // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); - SDValue StoredVal; + int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); + unsigned SizeInBits = NumStores * ElementSizeBits; + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + + EVT StoreTy; if (UseVector) { - bool IsVec = MemVT.isVector(); - unsigned Elts = NumStores; - if (IsVec) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); - } + unsigned Elts = NumStores * NumMemElts; // Get the type for the merged vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + } else + StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + SDValue StoredVal; + if (UseVector) { if (IsConstantSrc) { SmallVector<SDValue, 8> BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); SDValue Val = St->getValue(); - if (MemVT.getScalarType().isInteger()) - if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue())) - Val = DAG.getConstant( - (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(), - SDLoc(CFP), MemVT); + // If constant is of the wrong type, convert it now. + if (MemVT != Val.getValueType()) { + Val = peekThroughBitcast(Val); + // Deal with constants of wrong size. + if (ElementSizeBits != Val.getValueSizeInBits()) { + EVT IntMemVT = + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + if (isa<ConstantFPSDNode>(Val)) { + // Not clear how to truncate FP values. + return false; + } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) + Val = DAG.getConstant(C->getAPIntValue() + .zextOrTrunc(Val.getValueSizeInBits()) + .zextOrTrunc(ElementSizeBits), + SDLoc(C), IntMemVT); + } + // Make sure correctly size type is the correct type. + Val = DAG.getBitcast(MemVT, Val); + } BuildVector.push_back(Val); } - StoredVal = DAG.getBuildVector(Ty, DL, BuildVector); + StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, StoreTy, BuildVector); } else { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue Val = St->getValue(); - // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. - if (Val.getValueType() != MemVT) - return false; + SDValue Val = peekThroughBitcast(St->getValue()); + // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of + // type MemVT. If the underlying value is not the correct + // type, but it is an extraction of an appropriate vector we + // can recast Val to be of the correct type. This may require + // converting between EXTRACT_VECTOR_ELT and + // EXTRACT_SUBVECTOR. + if ((MemVT != Val.getValueType()) && + (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { + SDValue Vec = Val.getOperand(0); + EVT MemVTScalarTy = MemVT.getScalarType(); + // We may need to add a bitcast here to get types to line up. + if (MemVTScalarTy != Vec.getValueType()) { + unsigned Elts = Vec.getValueType().getSizeInBits() / + MemVTScalarTy.getSizeInBits(); + EVT NewVecTy = + EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); + Vec = DAG.getBitcast(NewVecTy, Vec); + } + auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR + : ISD::EXTRACT_VECTOR_ELT; + Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1)); + } Ops.push_back(Val); } // Build the extracted vector elements back into a vector. - StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, Ty, Ops); } + StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, StoreTy, Ops); + } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned SizeInBits = NumStores * ElementSizeBytes * 8; APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller @@ -12463,18 +12872,25 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - StoreInt <<= ElementSizeBytes * 8; + StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { - StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); + StoreInt |= C->getAPIntValue() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); + StoreInt |= C->getValueAPF() + .bitcastToAPInt() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); + // If fp truncation is necessary give up for now. + if (MemVT.getSizeInBits() != ElementSizeBits) + return false; } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } @@ -12483,7 +12899,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; - if (UseVector || !UseTrunc) { + if (!UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); @@ -12517,6 +12933,7 @@ void DAGCombiner::getStoreMergeCandidates( BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); EVT MemVT = St->getMemoryVT(); + SDValue Val = peekThroughBitcast(St->getValue()); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return; @@ -12525,47 +12942,62 @@ void DAGCombiner::getStoreMergeCandidates( if (BasePtr.getBase().isUndef()) return; - bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) || - isa<ConstantFPSDNode>(St->getValue()); - bool IsExtractVecSrc = - (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); - bool IsLoadSrc = isa<LoadSDNode>(St->getValue()); + bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); + bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsLoadSrc = isa<LoadSDNode>(Val); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. - if (IsLoadSrc) - LBasePtr = BaseIndexOffset::match( - cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG); - + EVT LoadVT; + if (IsLoadSrc) { + auto *Ld = cast<LoadSDNode>(Val); + LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + LoadVT = Ld->getMemoryVT(); + // Load and store should be the same type. + if (MemVT != LoadVT) + return; + } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; - // We can merge constant floats to equivalent integers - if (Other->getMemoryVT() != MemVT) - if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && - isa<ConstantFPSDNode>(Other->getValue()))) - return false; + SDValue Val = peekThroughBitcast(Other->getValue()); + // Allow merging constants of different types as integers. + bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT; if (IsLoadSrc) { + if (NoTypeMatch) + return false; // The Load's Base Ptr must also match - if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) { + if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + if (LoadVT != OtherLd->getMemoryVT()) + return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else return false; } - if (IsConstantSrc) - if (!(isa<ConstantSDNode>(Other->getValue()) || - isa<ConstantFPSDNode>(Other->getValue()))) + if (IsConstantSrc) { + if (NoTypeMatch) return false; - if (IsExtractVecSrc) - if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) + if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val))) + return false; + } + if (IsExtractVecSrc) { + // Do not merge truncated stores here. + if (Other->isTruncatingStore()) return false; + if (!MemVT.bitsEq(Val.getValueType())) + return false; + if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return false; + } Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -12612,10 +13044,8 @@ void DAGCombiner::getStoreMergeCandidates( // indirectly through its operand (we already consider dependencies // through the chain). Check in parallel by searching up from // non-chain operands of candidates. - bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { - // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to @@ -12648,12 +13078,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return false; EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) return false; - bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); // This function cannot currently deal with non-byte-sized memory sizes. @@ -12665,7 +13096,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Perform an early exit check. Do not bother looking at stored values that // are not constants, loads, or extracted vector elements. - SDValue StoredVal = St->getValue(); + SDValue StoredVal = peekThroughBitcast(St->getValue()); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal); @@ -12675,12 +13106,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) return false; - // Don't merge vectors into wider vectors if the source data comes from loads. - // TODO: This restriction can be lifted by using logic similar to the - // ExtractVecSrc case. - if (MemVT.isVector() && IsLoadSrc) - return false; - SmallVector<MemOpLink, 8> StoreNodes; // Find potential store merge candidates by searching through chain sub-DAG getStoreMergeCandidates(St, StoreNodes); @@ -12759,19 +13184,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned LastLegalVectorType = 1; bool LastIntegerTrunc = false; bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); SDValue StoredVal = ST->getValue(); - - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { - NonZero |= !C->isNullValue(); - } else if (ConstantFPSDNode *C = - dyn_cast<ConstantFPSDNode>(StoredVal)) { - NonZero |= !C->getConstantFPValue()->isNullValue(); - } else { - // Non-constant. - break; + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; } + NonZero |= !IsElementZero; // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; @@ -12791,8 +13217,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastIntegerTrunc = true; LastLegalType = i + 1; @@ -12806,13 +13232,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && !NoVectors) { // Find a legal type for the vector store. - unsigned Elts = i + 1; - if (MemVT.isVector()) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); - } + unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && @@ -12821,23 +13243,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } } + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + // Check if we found a legal integer type that creates a meaningful merge. - if (LastLegalType < 2 && LastLegalVectorType < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) { + NumSkip++; + } + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); continue; } - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - bool Merged = MergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - if (!Merged) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - continue; - } + RV |= Merged; + // Remove merged stores for next iteration. - RV = true; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); continue; } @@ -12849,25 +13282,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); unsigned NumStoresToMerge = 1; - bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); + SDValue StVal = peekThroughBitcast(St->getValue()); // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a // vector. It should be possible to handle mixed sources, but load // sources need more careful handling (see the block of code below that // handles consecutive loads). - if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && - StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) return RV; // Find a legal type for the vector store. - unsigned Elts = i + 1; - if (IsVec) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); - } + unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; @@ -12879,6 +13307,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NumStoresToMerge = i + 1; } + // Check if we found a legal integer type that creates a meaningful merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + continue; + } + bool Merged = MergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumStoresToMerge, false, true, false); if (!Merged) { @@ -12905,7 +13350,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { BaseIndexOffset LdBasePtr; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); + SDValue Val = peekThroughBitcast(St->getValue()); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val); if (!Ld) break; @@ -12917,10 +13363,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->isVolatile() || Ld->isIndexed()) break; - // We do not accept ext loads. - if (Ld->getExtensionType() != ISD::NON_EXTLOAD) - break; - // The stored memory type must be the same. if (Ld->getMemoryVT() != MemVT) break; @@ -12986,7 +13428,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { isDereferenceable = false; // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && @@ -13023,8 +13467,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && @@ -13047,7 +13491,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NumElem = std::min(LastLegalType, NumElem); if (NumElem < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); continue; } @@ -13055,7 +13511,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // to memory. EVT JointMemOpVT; if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); @@ -13104,12 +13562,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { SDValue(NewLoad.getNode(), 1)); } - // Replace the all stores with the new store. - for (unsigned i = 0; i < NumElem; ++i) + // Replace the all stores with the new store. Recursively remove + // corresponding value if its no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + RV = true; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - continue; } return RV; } @@ -13284,7 +13747,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = GetDemandedBits( + SDValue Shorter = DAG.GetDemandedBits( Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits())); AddToWorklist(Value.getNode()); @@ -13356,11 +13819,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Ptr, ST->getMemoryVT(), ST->getMemOperand()); } - // Only perform this optimization before the types are legal, because we - // don't want to perform this optimization on every DAGCombine invocation. - if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG - : !LegalTypes) { - for (;;) { + // Always perform this optimization before types are legal. If the target + // prefers, also try this after legalization to catch stores that were created + // by intrinsics or other nodes. + if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { + while (true) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. @@ -13499,6 +13962,60 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return St1; } +/// Convert a disguised subvector insertion into a shuffle: +/// insert_vector_elt V, (bitcast X from vector type), IdxC --> +/// bitcast(shuffle (bitcast V), (extended X), Mask) +/// Note: We do not use an insert_subvector node because that requires a legal +/// subvector type. +SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + SDValue InsertVal = N->getOperand(1); + if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || + !InsertVal.getOperand(0).getValueType().isVector()) + return SDValue(); + + SDValue SubVec = InsertVal.getOperand(0); + SDValue DestVec = N->getOperand(0); + EVT SubVecVT = SubVec.getValueType(); + EVT VT = DestVec.getValueType(); + unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); + unsigned NumMaskVals = ExtendRatio * NumSrcElts; + + // Step 1: Create a shuffle mask that implements this insert operation. The + // vector that we are inserting into will be operand 0 of the shuffle, so + // those elements are just 'i'. The inserted subvector is in the first + // positions of operand 1 of the shuffle. Example: + // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} + SmallVector<int, 16> Mask(NumMaskVals); + for (unsigned i = 0; i != NumMaskVals; ++i) { + if (i / NumSrcElts == InsIndex) + Mask[i] = (i % NumSrcElts) + NumMaskVals; + else + Mask[i] = i; + } + + // Bail out if the target can not handle the shuffle we want to create. + EVT SubVecEltVT = SubVecVT.getVectorElementType(); + EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); + if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) + return SDValue(); + + // Step 2: Create a wide vector from the inserted source vector by appending + // undefined elements. This is the same size as our destination vector. + SDLoc DL(N); + SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); + ConcatOps[0] = SubVec; + SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); + + // Step 3: Shuffle in the padded subvector. + SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); + SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); + AddToWorklist(PaddedSubV.getNode()); + AddToWorklist(DestVecBC.getNode()); + AddToWorklist(Shuf.getNode()); + return DAG.getBitcast(VT, Shuf); +} + SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); @@ -13511,10 +14028,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT VT = InVec.getValueType(); - // Check that we know which element is being inserted - if (!isa<ConstantSDNode>(EltNo)) + // Remove redundant insertions: + // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) + return InVec; + + // We must know which element is being inserted for folds below here. + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); + if (!IndexC) return SDValue(); - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + unsigned Elt = IndexC->getZExtValue(); + + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) + return Shuf; // Canonicalize insert_vector_elt dag nodes. // Example: @@ -13692,9 +14219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } - // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) + // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x) + bool isLE = DAG.getDataLayout().isLittleEndian(); + unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1; if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && - ConstEltNo->isNullValue() && VT.isInteger()) { + ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) { SDValue BCSrc = InVec.getOperand(0); if (BCSrc.getValueType().isScalarInteger()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); @@ -13748,7 +14277,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // FIXME: We should handle recursing on other vector shuffles and // scalar_to_vector here as well. - if (!LegalOperations) { + if (!LegalOperations || + // FIXME: Should really be just isOperationLegalOrCustom. + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) || + TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); @@ -14054,10 +14586,18 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, EVT InVT1 = VecIn1.getValueType(); EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; - unsigned Vec2Offset = InVT1.getVectorNumElements(); + unsigned Vec2Offset = 0; unsigned NumElems = VT.getVectorNumElements(); unsigned ShuffleNumElems = NumElems; + // In case both the input vectors are extracted from same base + // vector we do not need extra addend (Vec2Offset) while + // computing shuffle mask. + if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || + !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || + !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) + Vec2Offset = InVT1.getVectorNumElements(); + // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. if (InVT1 != VT || InVT2 != VT) { @@ -14072,7 +14612,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); VecIn2 = SDValue(); } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { - if (!TLI.isExtractSubvectorCheap(VT, NumElems)) + if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) return SDValue(); if (!VecIn2.getNode()) { @@ -14204,7 +14744,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); - SDValue ExtractedFromVec = Op.getOperand(0); // All inputs must have the same element type as the output. @@ -14227,6 +14766,50 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { if (VecIn.size() < 2) return SDValue(); + // If all the Operands of BUILD_VECTOR extract from same + // vector, then split the vector efficiently based on the maximum + // vector access index and adjust the VectorMask and + // VecIn accordingly. + if (VecIn.size() == 2) { + unsigned MaxIndex = 0; + unsigned NearestPow2 = 0; + SDValue Vec = VecIn.back(); + EVT InVT = Vec.getValueType(); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SmallVector<unsigned, 8> IndexVec(NumElems, 0); + + for (unsigned i = 0; i < NumElems; i++) { + if (VectorMask[i] <= 0) + continue; + unsigned Index = N->getOperand(i).getConstantOperandVal(1); + IndexVec[i] = Index; + MaxIndex = std::max(MaxIndex, Index); + } + + NearestPow2 = PowerOf2Ceil(MaxIndex); + if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && + NumElems * 2 < NearestPow2) { + unsigned SplitSize = NearestPow2 / 2; + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), SplitSize); + if (TLI.isTypeLegal(SplitVT)) { + SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, + DAG.getConstant(SplitSize, DL, IdxTy)); + SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, + DAG.getConstant(0, DL, IdxTy)); + VecIn.pop_back(); + VecIn.push_back(VecIn1); + VecIn.push_back(VecIn2); + + for (unsigned i = 0; i < NumElems; i++) { + if (VectorMask[i] <= 0) + continue; + VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; + } + } + } + } + // TODO: We want to sort the vectors by descending length, so that adjacent // pairs have similar length, and the longer vector is always first in the // pair. @@ -14315,77 +14898,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); } } - return Shuffles[0]; } -// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT -// operations which can be matched to a truncate. -SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { - // TODO: Add support for big-endian. - if (DAG.getDataLayout().isBigEndian()) - return SDValue(); - if (N->getNumOperands() < 2) - return SDValue(); - SDLoc DL(N); - EVT VT = N->getValueType(0); - unsigned NumElems = N->getNumOperands(); - - if (!isTypeLegal(VT)) - return SDValue(); - - // If the input is something other than an EXTRACT_VECTOR_ELT with a constant - // index, bail out. - // TODO: Allow undef elements in some cases? - if (any_of(N->ops(), [VT](SDValue Op) { - return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(Op.getOperand(1)) || - Op.getValueType() != VT.getVectorElementType(); - })) - return SDValue(); - - // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index - auto GetExtractIdx = [](SDValue Extract) { - return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue(); - }; - - // The first BUILD_VECTOR operand must be an an extract from index zero - // (assuming no undef and little-endian). - if (GetExtractIdx(N->getOperand(0)) != 0) - return SDValue(); - - // Compute the stride from the first index. - int Stride = GetExtractIdx(N->getOperand(1)); - SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); - - // Proceed only if the stride and the types can be matched to a truncate. - if ((Stride == 1 || !isPowerOf2_32(Stride)) || - (ExtractedFromVec.getValueType().getVectorNumElements() != - Stride * NumElems) || - (VT.getScalarSizeInBits() * Stride > 64)) - return SDValue(); - - // Check remaining operands are consistent with the computed stride. - for (unsigned i = 1; i != NumElems; ++i) { - SDValue Op = N->getOperand(i); - - if ((Op.getOperand(0) != ExtractedFromVec) || - (GetExtractIdx(Op) != Stride * i)) - return SDValue(); - } - - // All checks were ok, construct the truncate. - LLVMContext &Ctx = *DAG.getContext(); - EVT NewVT = VT.getVectorVT( - Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); - EVT TruncVT = - VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; - - SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec); - Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); - return DAG.getBitcast(VT, Res); -} - SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14428,10 +14943,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; - if (TLI.isDesirableToCombineBuildVectorToTruncate()) - if (SDValue V = reduceBuildVecToTrunc(N)) - return V; - if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -14514,8 +15025,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { for (SDValue Op : N->ops()) { // Peek through any bitcast. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); + Op = peekThroughBitcast(Op); // UNDEF nodes convert to UNDEF shuffle mask values. if (Op.isUndef()) { @@ -14534,8 +15044,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT ExtVT = ExtVec.getValueType(); // Peek through any bitcast. - while (ExtVec.getOpcode() == ISD::BITCAST) - ExtVec = ExtVec.getOperand(0); + ExtVec = peekThroughBitcast(ExtVec); // UNDEF nodes convert to UNDEF shuffle mask values. if (ExtVec.isUndef()) { @@ -14760,9 +15269,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // We are looking for an optionally bitcasted wide vector binary operator // feeding an extract subvector. - SDValue BinOp = Extract->getOperand(0); - if (BinOp.getOpcode() == ISD::BITCAST) - BinOp = BinOp.getOperand(0); + SDValue BinOp = peekThroughBitcast(Extract->getOperand(0)); // TODO: The motivating case for this transform is an x86 AVX1 target. That // target has temptingly almost legal versions of bitwise logic ops in 256-bit @@ -14786,13 +15293,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); // Peek through bitcasts of the binary operator operands if needed. - SDValue LHS = BinOp.getOperand(0); - if (LHS.getOpcode() == ISD::BITCAST) - LHS = LHS.getOperand(0); - - SDValue RHS = BinOp.getOperand(1); - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); + SDValue LHS = peekThroughBitcast(BinOp.getOperand(0)); + SDValue RHS = peekThroughBitcast(BinOp.getOperand(1)); // We need at least one concatenation operation of a binop operand to make // this transform worthwhile. The concat must double the input vector sizes. @@ -14891,8 +15393,34 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } // Skip bitcasting - if (V->getOpcode() == ISD::BITCAST) - V = V.getOperand(0); + V = peekThroughBitcast(V); + + // If the input is a build vector. Try to make a smaller build vector. + if (V->getOpcode() == ISD::BUILD_VECTOR) { + if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + EVT InVT = V->getValueType(0); + unsigned ExtractSize = NVT.getSizeInBits(); + unsigned EltSize = InVT.getScalarSizeInBits(); + // Only do this if we won't split any elements. + if (ExtractSize % EltSize == 0) { + unsigned NumElems = ExtractSize / EltSize; + EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElems); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) && + (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { + unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / + EltSize; + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), + makeArrayRef(V->op_begin() + IdxVal, + NumElems)); + return DAG.getBitcast(NVT, BuildVec); + } + } + } + } if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector @@ -15013,6 +15541,37 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } +static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + auto isUndefElt = [](SDValue V, int Idx) { + // TODO - handle more cases as required. + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Idx).isUndef(); + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Idx != 0) || V.getOperand(0).isUndef(); + return false; + }; + + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) || + ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) { + Changed = true; + Idx = -1; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); + + return SDValue(); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -15091,7 +15650,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't -// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, @@ -15103,6 +15662,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); + // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { @@ -15114,6 +15674,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return SDValue(); } + // If both inputs are splats of the same value then we can safely merge this + // to a single BUILD_VECTOR with undef elements based on the shuffle mask. + bool IsSplat = false; + auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (BV0 && BV1) + if (SDValue Splat0 = BV0->getSplatValue()) + IsSplat = (Splat0 == BV1->getSplatValue()); + SmallVector<SDValue, 8> Ops; SmallSet<SDValue, 16> DuplicateOps; for (int M : SVN->getMask()) { @@ -15124,23 +15693,25 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (S.getOpcode() == ISD::BUILD_VECTOR) { Op = S.getOperand(Idx); } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { - if (Idx == 0) - Op = S.getOperand(0); + assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); + Op = S.getOperand(0); } else { // Operand can't be combined - bail out. return SDValue(); } } - // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is - // fine, but it's likely to generate low-quality code if the target can't - // reconstruct an appropriate shuffle. + // Don't duplicate a non-constant BUILD_VECTOR operand unless we're + // generating a splat; semantically, this is fine, but it's likely to + // generate low-quality code if the target can't reconstruct an appropriate + // shuffle. if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) - if (!DuplicateOps.insert(Op).second) + if (!IsSplat && !DuplicateOps.insert(Op).second) return SDValue(); Ops.push_back(Op); } + // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. EVT SVT = VT.getScalarType(); @@ -15162,7 +15733,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, - bool LegalOperations) { + bool LegalOperations, + bool LegalTypes) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -15190,14 +15762,18 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for // power-of-2 extensions as they are the most likely. for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + // Check for non power of 2 vector sizes + if (NumElts % Scale != 0) + continue; if (!isAnyExtend(Scale)) continue; EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); - if (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) - return DAG.getBitcast(VT, + if (!LegalTypes || TLI.isTypeLegal(OutVT)) + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) + return DAG.getBitcast(VT, DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); } @@ -15218,9 +15794,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, if (!VT.isInteger() || IsBigEndian) return SDValue(); - SDValue N0 = SVN->getOperand(0); - while (N0.getOpcode() == ISD::BITCAST) - N0 = N0.getOperand(0); + SDValue N0 = peekThroughBitcast(SVN->getOperand(0)); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && @@ -15316,6 +15890,84 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, NewMask); } +/// If the shuffle mask is taking exactly one element from the first vector +/// operand and passing through all other elements from the second vector +/// operand, return the index of the mask element that is choosing an element +/// from the first operand. Otherwise, return -1. +static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { + int MaskSize = Mask.size(); + int EltFromOp0 = -1; + // TODO: This does not match if there are undef elements in the shuffle mask. + // Should we ignore undefs in the shuffle mask instead? The trade-off is + // removing an instruction (a shuffle), but losing the knowledge that some + // vector lanes are not needed. + for (int i = 0; i != MaskSize; ++i) { + if (Mask[i] >= 0 && Mask[i] < MaskSize) { + // We're looking for a shuffle of exactly one element from operand 0. + if (EltFromOp0 != -1) + return -1; + EltFromOp0 = i; + } else if (Mask[i] != i + MaskSize) { + // Nothing from operand 1 can change lanes. + return -1; + } + } + return EltFromOp0; +} + +/// If a shuffle inserts exactly one element from a source vector operand into +/// another vector operand and we can access the specified element as a scalar, +/// then we can eliminate the shuffle. +static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + // First, check if we are taking one element of a vector and shuffling that + // element into another vector. + ArrayRef<int> Mask = Shuf->getMask(); + SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); + SDValue Op0 = Shuf->getOperand(0); + SDValue Op1 = Shuf->getOperand(1); + int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); + if (ShufOp0Index == -1) { + // Commute mask and check again. + ShuffleVectorSDNode::commuteMask(CommutedMask); + ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); + if (ShufOp0Index == -1) + return SDValue(); + // Commute operands to match the commuted shuffle mask. + std::swap(Op0, Op1); + Mask = CommutedMask; + } + + // The shuffle inserts exactly one element from operand 0 into operand 1. + // Now see if we can access that element as a scalar via a real insert element + // instruction. + // TODO: We can try harder to locate the element as a scalar. Examples: it + // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. + assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && + "Shuffle mask value must be from operand 0"); + if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) + return SDValue(); + + auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2)); + if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) + return SDValue(); + + // There's an existing insertelement with constant insertion index, so we + // don't need to check the legality/profitability of a replacement operation + // that differs at most in the constant value. The target should be able to + // lower any of those in a similar way. If not, legalization will expand this + // to a scalar-to-vector plus shuffle. + // + // Note that the shuffle may move the scalar from the position that the insert + // element used. Therefore, our new insert element occurs at the shuffle's + // mask index value, not the insert's index value. + // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' + SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), + Op0.getOperand(2).getValueType()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), + Op1, Op0.getOperand(1), NewInsIndex); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -15362,6 +16014,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // Simplify shuffle mask if a referenced element is UNDEF. + if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) + return V; + + if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) + return InsElt; + // A shuffle of a single vector that is a splat can always be folded. if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) if (N1->isUndef() && N0Shuf->isSplat()) @@ -15426,7 +16085,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return S; // Match shuffles that can be converted to any_vector_extend_in_reg. - if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) return V; // Combine "truncate_vector_in_reg" style shuffles. @@ -15486,7 +16145,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (TLI.isTypeLegal(ScaleVT) && 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { - int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); @@ -15661,23 +16319,46 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern - // with a VECTOR_SHUFFLE. + // with a VECTOR_SHUFFLE and possible truncate. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); - - // FIXME: We could support implicit truncation if the shuffle can be - // scaled to a smaller vector scalar type. - ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); - if (C0 && VT == InVec.getValueType() && - VT.getScalarType() == InVal.getValueType()) { - SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + auto InVecT = InVec.getValueType(); + if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { + SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - - if (TLI.isShuffleMaskLegal(NewMask, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), - NewMask); + SDValue Val; + // If we have an implict truncate do truncate here as long as it's legal. + // if it's not legal, this should + if (VT.getScalarType() != InVal.getValueType() && + InVal.getValueType().isScalarInteger() && + isTypeLegal(VT.getScalarType())) { + Val = + DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); + } + if (VT.getScalarType() == InVecT.getScalarType() && + VT.getVectorNumElements() <= InVecT.getVectorNumElements() && + TLI.isShuffleMaskLegal(NewMask, VT)) { + Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask); + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return Val; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, + ZeroIdx); + return Val; + } + } } } @@ -15694,12 +16375,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N1.isUndef()) return N0; + // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow + // us to pull BITCASTs from input to output. + if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR) + if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode())) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); + // If this is an insert of an extracted vector into an undef vector, we can // just use the input to the extract. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); + // If we are inserting a bitcast value into an undef, with the same + // number of elements, just use the bitcast input of the extract. + // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) + if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && + N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(0).getOperand(1) == N2 && + N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == + VT.getVectorNumElements()) { + return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); + } + + // If both N1 and N2 are bitcast values on which insert_subvector + // would makes sense, pull the bitcast through. + // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR N0 N1 N2) + if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { + SDValue CN0 = N0.getOperand(0); + SDValue CN1 = N1.getOperand(0); + if (CN0.getValueType().getVectorElementType() == + CN1.getValueType().getVectorElementType() && + CN0.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) { + SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), + CN0.getValueType(), CN0, CN1, N2); + return DAG.getBitcast(VT, NewINSERT); + } + } + // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) @@ -15779,7 +16495,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); + SDValue RHS = peekThroughBitcast(N->getOperand(1)); SDLoc DL(N); // Make sure we're not running after operation legalization where it @@ -15790,9 +16506,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (N->getOpcode() != ISD::AND) return SDValue(); - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); @@ -15945,7 +16658,6 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, /// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { - // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { @@ -16418,7 +17130,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue DAGCombiner::BuildSDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -16429,7 +17141,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode*> Built; + std::vector<SDNode *> Built; SDValue S = TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); @@ -16464,7 +17176,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { SDValue DAGCombiner::BuildUDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -16475,7 +17187,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode*> Built; + std::vector<SDNode *> Built; SDValue S = TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); @@ -16760,8 +17472,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { if (Op1->isInvariant() && Op0->writeMem()) return false; - unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; - unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize(); + unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); // Check for BaseIndexOffset matching. BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); @@ -16957,7 +17669,11 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain /// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { - SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + if (OptLevel == CodeGenOpt::None) + return OldChain; + + // Ops for replacing token factor. + SmallVector<SDValue, 8> Aliases; // Accumulate all the aliases to this node. GatherAllAliases(N, OldChain, Aliases); @@ -16987,6 +17703,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // to go from a partially-merged state to the desired final // fully-merged state. bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None) + return false; + // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b2599b2e17f1..d3c94b5f9e6b 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -63,6 +63,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -98,11 +101,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -168,8 +168,7 @@ bool FastISel::hasTrivialKill(const Value *V) { // No-op casts are trivially coalesced by fast-isel. if (const auto *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && - !hasTrivialKill(Cast->getOperand(0))) + if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0))) return false; // Even the value might have only one use in the LLVM IR, it is possible that @@ -1133,6 +1132,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::lifetime_end: // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: + // Neither does the sideeffect intrinsic. + case Intrinsic::sideeffect: // Neither does the assume intrinsic; it's also OK not to codegen its operand. case Intrinsic::assume: return true; @@ -1187,7 +1188,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // into an indirect DBG_VALUE. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - Op->getReg(), 0, DI->getVariable(), DI->getExpression()); + Op->getReg(), DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) @@ -1212,35 +1213,32 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(0U) - .addImm(DI->getOffset()) - .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, + DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addImm(DI->getOffset()) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addImm(DI->getOffset()) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addImm(DI->getOffset()) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = DI->getOffset() != 0; + bool IsIndirect = false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, - DI->getOffset(), DI->getVariable(), DI->getExpression()); + DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b736037d71dd..c7cdb49203b1 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -17,11 +17,14 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -32,12 +35,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index b96c96f0b4df..cc9b41b4b487 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,14 +21,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -673,7 +673,6 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { - uint64_t Offset = SD->getOffset(); MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -685,7 +684,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()) - .addImm(Offset) + .addImm(0) .addMetadata(Var) .addMetadata(Expr); } @@ -727,11 +726,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - MIB.addImm(Offset); - else { - assert(Offset == 0 && "direct value cannot have an offset"); + MIB.addImm(0U); + else MIB.addReg(0U, RegState::Debug); - } MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -938,10 +935,14 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); break; } - case ISD::EH_LABEL: { - MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); + case ISD::EH_LABEL: + case ISD::ANNOTATION_LABEL: { + unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL) + ? TargetOpcode::EH_LABEL + : TargetOpcode::ANNOTATION_LABEL; + MCSymbol *S = cast<LabelSDNode>(Node)->getLabel(); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::EH_LABEL)).addSym(S); + TII->get(Opc)).addSym(S); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7e4bc3ccb5d3..bb1dc17b7a1b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1,4 +1,4 @@ -//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===// // // The LLVM Compiler Infrastructure // @@ -11,37 +11,65 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <tuple> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "legalizedag" namespace { -struct FloatSignAsInt; +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; + uint8_t SignBit; +}; //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and @@ -54,7 +82,6 @@ struct FloatSignAsInt; /// as part of its processing. For example, if a target does not support a /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. -/// class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; @@ -165,11 +192,13 @@ private: public: // Node replacement helpers + void ReplacedNode(SDNode *N) { LegalizedNodes.erase(N); if (UpdatedNodes) UpdatedNodes->insert(N); } + void ReplaceNode(SDNode *Old, SDNode *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); @@ -182,6 +211,7 @@ public: UpdatedNodes->insert(New); ReplacedNode(Old); } + void ReplaceNode(SDValue Old, SDValue New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); @@ -191,6 +221,7 @@ public: UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } + void ReplaceNode(SDNode *Old, const SDValue *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); @@ -205,7 +236,8 @@ public: ReplacedNode(Old); } }; -} + +} // end anonymous namespace /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type @@ -376,6 +408,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due @@ -434,172 +467,184 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { - StoreSDNode *ST = cast<StoreSDNode>(Node); - SDValue Chain = ST->getChain(); - SDValue Ptr = ST->getBasePtr(); - SDLoc dl(Node); - - unsigned Alignment = ST->getAlignment(); - MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); - AAMDNodes AAInfo = ST->getAAInfo(); - - if (!ST->isTruncatingStore()) { - if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - ReplaceNode(ST, OptStore); - return; - } + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDLoc dl(Node); - { - SDValue Value = ST->getValue(); - MVT VT = Value.getSimpleValueType(); - switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: { - // If this is an unaligned store and the target doesn't support it, - // expand it. - EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); - const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - SDValue Result = TLI.expandUnalignedStore(ST, DAG); - ReplaceNode(SDValue(ST, 0), Result); - } - break; - } - case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res && Res != SDValue(Node, 0)) - ReplaceNode(SDValue(Node, 0), Res); - return; - } - case TargetLowering::Promote: { - MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); - assert(NVT.getSizeInBits() == VT.getSizeInBits() && - "Can only promote stores to same size type"); - Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - return; - } + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + if (!ST->isTruncatingStore()) { + DEBUG(dbgs() << "Legalizing store operation\n"); + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + SDValue Value = ST->getValue(); + MVT VT = Value.getSimpleValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + // If this is an unaligned store and the target doesn't support it, + // expand it. + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } else + DEBUG(dbgs() << "Legal store\n"); + break; + } + case TargetLowering::Custom: { + DEBUG(dbgs() << "Trying custom lowering\n"); + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + + DEBUG(dbgs() << "Legalizing truncating store operations\n"); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + auto &DL = DAG.getDataLayout(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (DL.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore( + Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { - SDValue Value = ST->getValue(); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - auto &DL = DAG.getDataLayout(); - - if (StWidth != StVT.getStoreSizeInBits()) { - // Promote to a byte-sized store with upper bits zero if not - // storing an integral number of bytes. For example, promote - // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Value = DAG.getZeroExtendInReg(Value, dl, StVT); - SDValue Result = - DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { - // If not storing a power-of-2 number of bits, expand as two stores. - assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Store size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi; - unsigned IncrementSize; - - if (DL.isLittleEndian()) { - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) - // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); - Hi = DAG.getNode( - ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore( - Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - } else { - // Big endian - avoid unaligned stores. - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X - // Store the top RoundWidth bits. - Hi = DAG.getNode( - ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); - Lo = DAG.getTruncStore( - Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - } + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); - // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - ReplaceNode(SDValue(Node, 0), Result); + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getTruncStore( + Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + SDValue Result; + + // TRUNCSTORE:i16 i32 -> STORE i16 + if (TLI.isTypeLegal(StVT)) { + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: { - EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - SDValue Result = TLI.expandUnalignedStore(ST, DAG); - ReplaceNode(SDValue(ST, 0), Result); - } - break; - } - case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res && Res != SDValue(Node, 0)) - ReplaceNode(SDValue(Node, 0), Res); - return; - } - case TargetLowering::Expand: - assert(!StVT.isVector() && - "Vector Stores are handled in LegalizeVectorOps"); - - // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && - "Do not know how to expand this store!"); - Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - break; - } + // The in-memory type isn't legal. Truncate to the type it would promote + // to, and then do a truncstore. + Value = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), StVT), + Value); + Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + StVT, Alignment, MMOFlags, AAInfo); } + + ReplaceNode(SDValue(Node, 0), Result); + break; } + } } void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { @@ -611,6 +656,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { + DEBUG(dbgs() << "Legalizing non-extending load operation\n"); MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); @@ -629,13 +675,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } break; } - case TargetLowering::Custom: { + case TargetLowering::Custom: if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { RVal = Res; RChain = Res.getValue(1); } break; - } + case TargetLowering::Promote: { MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); assert(NVT.getSizeInBits() == VT.getSizeInBits() && @@ -660,6 +706,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { return; } + DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); @@ -795,7 +842,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Custom: isCustom = true; LLVM_FALLTHROUGH; - case TargetLowering::Legal: { + case TargetLowering::Legal: Value = SDValue(Node, 0); Chain = SDValue(Node, 1); @@ -816,8 +863,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } break; - } - case TargetLowering::Expand: + + case TargetLowering::Expand: { EVT DestVT = Node->getValueType(0); if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to @@ -883,6 +930,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Result.getValue(1); break; } + } } // Since loads produce two values, make sure to remember that we legalized @@ -907,6 +955,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; + case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; @@ -932,7 +981,9 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); - if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + // Allow illegal target nodes and illegal registers. + if (Node->getOpcode() == ISD::TargetConstant || + Node->getOpcode() == ISD::Register) return; #ifndef NDEBUG @@ -946,7 +997,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || TLI.isTypeLegal(Op.getValueType()) || - Op.getOpcode() == ISD::TargetConstant) && + Op.getOpcode() == ISD::TargetConstant || + Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); #endif @@ -983,11 +1035,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } - case ISD::ATOMIC_STORE: { + case ISD::ATOMIC_STORE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(2).getValueType()); break; - } case ISD::SELECT_CC: case ISD::SETCC: case ISD::BR_CC: { @@ -1072,6 +1123,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } break; case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: case ISD::STRICT_FPOW: case ISD::STRICT_FPOWI: case ISD::STRICT_FSIN: @@ -1090,7 +1142,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), Node->getValueType(0)); break; - default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1141,8 +1192,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (SAO != Op2) NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO); } + break; } - break; } if (NewNode != Node) { @@ -1151,8 +1202,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } switch (Action) { case TargetLowering::Legal: + DEBUG(dbgs() << "Legal node: nothing to do\n"); return; - case TargetLowering::Custom: { + case TargetLowering::Custom: + DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { @@ -1160,6 +1213,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; if (Node->getNumValues() == 1) { + DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; @@ -1168,11 +1222,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); + DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; } + DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; - } case TargetLowering::Expand: if (ExpandNode(Node)) return; @@ -1198,13 +1253,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: break; - case ISD::LOAD: { + case ISD::LOAD: return LegalizeLoadOps(Node); - } - case ISD::STORE: { + case ISD::STORE: return LegalizeStoreOps(Node); } - } } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1240,7 +1293,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). Also, the store might be - // dependent on the extractelement and introduce a cycle when creating + // dependent on the extractelement and introduce a cycle when creating // the load. if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) || ST->hasPredecessor(Op.getNode())) @@ -1361,22 +1414,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } -namespace { -/// Keeps track of state when getting the sign of a floating-point value as an -/// integer. -struct FloatSignAsInt { - EVT FloatVT; - SDValue Chain; - SDValue FloatPtr; - SDValue IntPtr; - MachinePointerInfo IntPointerInfo; - MachinePointerInfo FloatPointerInfo; - SDValue IntValue; - APInt SignMask; - uint8_t SignBit; -}; -} - /// Bitcast a floating-point value to an integer value. Only bitcast the part /// containing the sign bit if the target has no integer value capable of /// holding all bits of the floating-point value. @@ -1753,8 +1790,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, // We do this in two phases; first to check the legality of the shuffles, // and next, assuming that all shuffles are legal, to create the new nodes. for (int Phase = 0; Phase < 2; ++Phase) { - SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, - NewIntermedVals; + SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals, + NewIntermedVals; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) @@ -1977,10 +2014,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain) && - (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy()); + (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; @@ -1996,10 +2033,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - if (!CallInfo.second.getNode()) + if (!CallInfo.second.getNode()) { + DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); + } + DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); return CallInfo.first; } @@ -2285,9 +2325,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - + DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { - // simple 32-bit [signed|unsigned] integer to float/double expansion + DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " + "expansion\n"); // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); @@ -2352,6 +2393,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = @@ -2372,9 +2414,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } - // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { @@ -2498,7 +2540,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, unsigned OpToUse = 0; // Scan for the appropriate larger type to use. - while (1) { + while (true) { NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); assert(NewInTy.isInteger() && "Ran out of possibilities!"); @@ -2539,7 +2581,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, unsigned OpToUse = 0; // Scan for the appropriate larger type to use. - while (1) { + while (true) { NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); @@ -2559,7 +2601,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, // Otherwise, try a larger type. } - // Okay, we found the operation and type to use. SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); @@ -2745,7 +2786,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { EVT VT = Op.getValueType(); - unsigned len = VT.getSizeInBits(); + unsigned Len = VT.getSizeInBits(); if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { EVT SetCCVT = getSetCCResultType(VT); @@ -2753,7 +2794,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, - DAG.getConstant(len, dl, VT), CTLZ); + DAG.getConstant(Len, dl, VT), CTLZ); } // for now, we do this: @@ -2766,7 +2807,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // // Ref: "Hacker's Delight" by Henry Warren EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); @@ -2778,11 +2819,22 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This trivially expands to CTTZ. return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { + EVT VT = Op.getValueType(); + unsigned Len = VT.getSizeInBits(); + + if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = getSetCCResultType(VT); + SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(Len, dl, VT), CTTZ); + } + // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren - EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2799,6 +2851,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { + DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -2983,8 +3036,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // NOTE: we could fall back on load/store here too for targets without // SRA. However, it is doubtful that any exist. EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - if (VT.isVector()) - ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarSizeInBits() - ExtraVT.getScalarSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); @@ -3062,10 +3113,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INSERT_SUBVECTOR: Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); break; - case ISD::CONCAT_VECTORS: { + case ISD::CONCAT_VECTORS: Results.push_back(ExpandVectorBuildThroughStack(Node)); break; - } case ISD::SCALAR_TO_VECTOR: Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); break; @@ -3083,14 +3133,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); if (!TLI.isTypeLegal(EltVT)) { - EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept // it. if (NewEltVT.bitsLT(EltVT)) { - // Convert shuffle node. // If original node was v4i64 and the new EltVT is i32, // cast operands to v8i32 and re-build the mask. @@ -3261,6 +3309,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; case ISD::FP_TO_FP16: + DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3457,7 +3506,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - // SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, @@ -3666,10 +3714,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { - // We test only the i1 bit. Skip the AND if UNDEF. - Tmp3 = (Tmp2.isUndef()) ? Tmp2 : - DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, - DAG.getConstant(1, dl, Tmp2.getValueType())); + // We test only the i1 bit. Skip the AND if UNDEF or another AND. + if (Tmp2.isUndef() || + (Tmp2.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Tmp2.getOperand(1)) && + dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) + Tmp3 = Tmp2; + else + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, DAG.getCondCode(ISD::SETNE), Tmp3, DAG.getConstant(0, dl, Tmp3.getValueType()), @@ -3865,17 +3918,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (Results.empty()) + if (Results.empty()) { + DEBUG(dbgs() << "Cannot expand node\n"); return false; + } + DEBUG(dbgs() << "Succesfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); - SDValue Tmp1, Tmp2, Tmp3, Tmp4; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -4057,6 +4113,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::REM_PPCF128)); break; case ISD::FMA: + case ISD::STRICT_FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); @@ -4126,8 +4183,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { + DEBUG(dbgs() << "Successfully converted node to libcall\n"); ReplaceNode(Node, Results.data()); + } else + DEBUG(dbgs() << "Could not convert node to libcall\n"); } // Determine the vector type to use in place of an original scalar element when @@ -4141,6 +4201,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI, } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + DEBUG(dbgs() << "Trying to promote node\n"); SmallVector<SDValue, 8> Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || @@ -4369,7 +4430,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FPOW: { + case ISD::FPOW: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, @@ -4377,8 +4438,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; - } - case ISD::FMA: { + case ISD::FMA: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); @@ -4387,7 +4447,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; - } case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); @@ -4419,13 +4478,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FLOG10: case ISD::FABS: case ISD::FEXP: - case ISD::FEXP2: { + case ISD::FEXP2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, DAG.getIntPtrConstant(0, dl))); break; - } case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); @@ -4579,8 +4637,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { + DEBUG(dbgs() << "Successfully promoted node\n"); ReplaceNode(Node, Results.data()); + } else + DEBUG(dbgs() << "Could not promote node\n"); } /// This is the entry point for the file. @@ -4602,7 +4663,7 @@ void SelectionDAG::Legalize() { // nodes with their original operands intact. Legalization can produce // new nodes which may themselves need to be legalized. Iterate until all // nodes have been legalized. - for (;;) { + while (true) { bool AnyLegalized = false; for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 75fec7bd1d48..29f0bb475b08 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -40,8 +40,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { SDValue Res = SDValue(); // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(ResNo), true)) + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + DEBUG(dbgs() << "Node has been custom expanded, done\n"); return; + } switch (N->getOpcode()) { default: @@ -568,10 +570,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { SDValue Mask = N->getOperand(0); - EVT OpTy = N->getOperand(1).getValueType(); - // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -773,7 +772,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); - llvm_unreachable("Not implemented"); + + // We need to sign-extend the operands so the carry value computed by the + // wide operation will be equivalent to the carry value computed by the + // narrow operation. + // An ADDCARRY can generate carry only if any of the operands has its + // most significant bit set. Sign extension propagates the most significant + // bit into the higher bits which means the extra bit that the narrow + // addition would need (i.e. the carry) will be propagated through the higher + // bits of the wide addition. + // A SUBCARRY can generate borrow only if LHS < RHS and this property will be + // preserved by sign extension. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + + EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)}; + + // Do the arithmetic in the wide type. + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs), + LHS, RHS, N->getOperand(2)); + + // Update the users of the original carry/borrow value. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + return SDValue(Res.getNode(), 0); } SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { @@ -885,8 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; + } switch (N->getOpcode()) { default: @@ -1206,24 +1230,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, // When the data operand has illegal type, we should legalize the data // operand first. The mask will be promoted/splitted/widened according to // the data operand type. - if (TLI.isTypeLegal(DataVT)) + if (TLI.isTypeLegal(DataVT)) { Mask = PromoteTargetBoolean(Mask, DataVT); - else { - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) - return PromoteIntOp_MSTORE(N, 3); - - else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) - return WidenVecOp_MSTORE(N, 3); - - else { - assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); - return SplitVecOp_MSTORE(N, 3); - } + // Update in place. + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[2] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } + + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); + if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); } else { // Data operand assert(OpNo == 3 && "Unexpected operand for promotion"); DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } @@ -1250,6 +1273,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, // The Mask EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1270,6 +1296,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, // The Mask EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -3224,8 +3253,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getTruncStore( Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); @@ -3260,8 +3288,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -3462,7 +3489,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); @@ -3471,15 +3497,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); + // If the input type is legal and we can promote it to a legal type with the + // same element size, go ahead do that to create a new concat. + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeLegal) { + EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); + if (TLI.isTypeLegal(InPromotedTy)) { + SmallVector<SDValue, 8> Ops(NumOperands); + for (unsigned i = 0; i < NumOperands; ++i) { + Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, + N->getOperand(i)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); + } + } + // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); for (unsigned i = 0; i < NumOperands; ++i) { SDValue Op = N->getOperand(i); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger) + Op = GetPromotedInteger(Op); + EVT SclrTy = Op.getValueType().getVectorElementType(); + assert(NumElem == Op.getValueType().getVectorNumElements() && + "Unexpected number of elements"); + for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op, + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 001eed9fb8f6..b60d7bca498a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -14,7 +14,9 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" @@ -222,15 +224,21 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - if (IgnoreNodeResults(N)) + DEBUG(dbgs() << "Legalizing node: "; N->dump()); + if (IgnoreNodeResults(N)) { + DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; + } // Scan the values produced by the node, checking to see if any result // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); + DEBUG(dbgs() << "Analyzing result type: " << + ResultVT.getEVTString() << "\n"); switch (getTypeAction(ResultVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal result type\n"); break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results @@ -287,9 +295,12 @@ ScanOperands: if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - EVT OpVT = N->getOperand(i).getValueType(); + const auto Op = N->getOperand(i); + DEBUG(dbgs() << "Analyzing operand: "; Op.dump()); + EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal operand\n"); continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's @@ -832,6 +843,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); + // Transfer debug values. Don't invalidate the source debug value until it's + // been transferred to the high and low bits. + if (DAG.getDataLayout().isBigEndian()) { + DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false); + DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(), + Lo.getValueSizeInBits()); + } else { + DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false); + DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(), + Hi.getValueSizeInBits()); + } + // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; assert(!Entry.first.getNode() && "Node already expanded"); @@ -1002,8 +1025,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { // Update the widening map. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) - SetWidenedVector(SDValue(N, i), Results[i]); + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + // If this is a chain output just replace it. + if (Results[i].getValueType() == MVT::Other) + ReplaceValueWith(SDValue(N, i), Results[i]); + else + SetWidenedVector(SDValue(N, i), Results[i]); + } return true; } @@ -1117,23 +1145,6 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } -/// Widen the given target boolean to a target boolean of the given type. -/// The boolean vector is widened and then promoted to match the target boolean -/// type of the given ValVT. -SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, - bool WithZeroes) { - SDLoc dl(Bool); - EVT BoolVT = Bool.getValueType(); - - assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && - TLI.isTypeLegal(ValVT) && - "Unexpected types in WidenTargetBoolean"); - EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), - ValVT.getVectorNumElements()); - Bool = ModifyToType(Bool, WideVT, WithZeroes); - return PromoteTargetBoolean(Bool, ValVT); -} - /// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -1142,9 +1153,14 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + unsigned ReqShiftAmountInBits = + Log2_32_Ceil(Op.getValueType().getSizeInBits()); + MVT ShiftAmountTy = + TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType()); + if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits()) + ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits)); Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, - DAG.getConstant(LoVT.getSizeInBits(), dl, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy)); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c46d1b04804c..64cb80e0d853 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -18,9 +18,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" namespace llvm { @@ -89,7 +89,8 @@ private: /// Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { - return N->getOpcode() == ISD::TargetConstant; + return N->getOpcode() == ISD::TargetConstant || + N->getOpcode() == ISD::Register; } /// For integer nodes that are below legal width, this map indicates what @@ -182,10 +183,6 @@ private: SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); - /// Modify Bit Vector to match SetCC result type of ValVT. - /// The bit vector is widened with zeroes when WithZeroes is true. - SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); - void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -400,18 +397,22 @@ private: /// Given an operand Op of Float type, returns the integer if the Op is not /// supported in target HW and converted to the integer. /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this method - /// returns an i32, the bits of which coincide with those of Op. + /// For example, if Op is an f32 which was softened to an i32, then this + /// method returns an i32, the bits of which coincide with those of Op. /// If the Op can be efficiently supported in target HW or the operand must /// stay in a register, the Op is not converted to an integer. /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { - SDValue &SoftenedOp = SoftenedFloats[Op]; - if (!SoftenedOp.getNode() && - isSimpleLegalType(Op.getValueType())) + auto Iter = SoftenedFloats.find(Op); + if (Iter == SoftenedFloats.end()) { + assert(isSimpleLegalType(Op.getValueType()) && + "Operand wasn't converted to integer?"); return Op; + } + + SDValue &SoftenedOp = Iter->second; + assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); RemapValue(SoftenedOp); - assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); @@ -618,7 +619,6 @@ private: SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); - SDValue ScalarizeVecRes_VSETCC(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -727,7 +727,6 @@ private: SDValue WidenVecRes_SETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); - SDValue WidenVecRes_VSETCC(SDNode* N); SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index f3306151d864..993465ae9dc2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -484,8 +484,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 9355dbe77f94..74970ab5792c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1,4 +1,4 @@ -//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// // // The LLVM Compiler Infrastructure // @@ -27,15 +27,34 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> + using namespace llvm; namespace { + class VectorLegalizer { SelectionDAG& DAG; const TargetLowering &TLI; - bool Changed; // Keep track of whether anything changed + bool Changed = false; // Keep track of whether anything changed /// For nodes that are of legal width, and that have more than one use, this /// map indicates what regularized operand to use. This allows us to avoid @@ -128,12 +147,15 @@ class VectorLegalizer { SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); public: + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()) {} + /// \brief Begin legalizer the vector operations in the DAG. bool Run(); - VectorLegalizer(SelectionDAG& dag) : - DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} }; +} // end anonymous namespace + bool VectorLegalizer::Run() { // Before we start legalizing vector nodes, check if there are any vectors. bool HasVectors = false; @@ -475,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { "Can't promote a vector with multiple results!"); EVT VT = Op.getValueType(); - EVT NewVT; + EVT NewVT = VT; unsigned NewOpc; - while (1) { - NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + while (true) { + NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext()); assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { NewOpc = ISD::FP_TO_SINT; @@ -490,12 +512,19 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { } } - SDLoc loc(Op); - SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); + SDLoc dl(Op); + SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, NewVT, Promoted, + DAG.getValueType(VT.getScalarType())); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); } - SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); @@ -503,7 +532,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { EVT SrcEltVT = SrcVT.getScalarType(); unsigned NumElem = SrcVT.getVectorNumElements(); - SDValue NewChain; SDValue Value; if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { @@ -534,7 +562,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { unsigned Offset = 0; unsigned RemainingBytes = SrcVT.getStoreSize(); SmallVector<SDValue, 8> LoadVals; - while (RemainingBytes > 0) { SDValue ScalarLoad; unsigned LoadBytes = WideBytes; @@ -560,9 +587,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { RemainingBytes -= LoadBytes; Offset += LoadBytes; - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(LoadBytes, dl, - BasePTR.getValueType())); + + BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -1117,8 +1143,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { return DAG.getBuildVector(VT, dl, Ops); } -} - bool SelectionDAG::LegalizeVectors() { return VectorLegalizer(*this).Run(); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6aa3270883f0..8f2320f52a0f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -243,7 +243,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is // legal and was not scalarized. - // See the similar logic in ScalarizeVecRes_VSETCC + // See the similar logic in ScalarizeVecRes_SETCC if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Op = GetScalarizedVector(Op); } else { @@ -307,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDLoc DL(N); // The vselect result and true/value operands needs scalarizing, but it's // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. - // See the similar logic in ScalarizeVecRes_VSETCC + // See the similar logic in ScalarizeVecRes_SETCC if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Cond = GetScalarizedVector(Cond); } else { @@ -380,21 +380,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { N->getOperand(4)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - - if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); - SDLoc DL(N); - - // Turn it into a scalar SETCC. - return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); } @@ -408,7 +393,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { return GetScalarizedVector(N->getOperand(Op)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); @@ -461,7 +446,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to scalarize this operator's operand!"); + report_fatal_error("Do not know how to scalarize this operator's " + "operand!\n"); case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; @@ -1068,34 +1054,57 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, if (CustomLowerNode(N, N->getValueType(0), true)) return; - // Spill the vector to the stack. + // Make the vector elements byte-addressable if they aren't already. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); + if (VecVT.getScalarSizeInBits() < 8) { + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); + // Extend the element type to match if needed. + if (EltVT.bitsGT(Elt.getValueType())) + Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt); + } + + // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = - DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, + MachinePointerInfo::getUnknownStack(MF), EltVT); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); // Load the Lo part from the stack slot. - Lo = - DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueSizeInBits() / 8; + unsigned IncrementSize = LoVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize)); + + // If we adjusted the original type, we need to truncate the results. + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + if (LoVT != Lo.getValueType()) + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo); + if (HiVT != Hi.getValueType()) + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -1130,8 +1139,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, Alignment, MMOFlags, AAInfo); @@ -1283,10 +1291,19 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - // Split the input. + // If the input also splits, handle it directly. Otherwise split it by hand. SDValue LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), LL, LH); + else + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + + if (getTypeAction(N->getOperand(1).getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(1), RL, RH); + else + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -1753,30 +1770,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); EVT EltVT = VecVT.getVectorElementType(); - if (EltVT.getSizeInBits() < 8) { - SmallVector<SDValue, 4> ElementOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { - ElementOps.push_back(DAG.getAnyExtOrTrunc( - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, - DAG.getConstant(i, dl, MVT::i8)), - dl, MVT::i8)); - } - + if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, VecVT.getVectorNumElements()); - Vec = DAG.getBuildVector(VecVT, dl, ElementOps); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); } // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT); + return DAG.getExtLoad( + ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -1886,9 +1898,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); - MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); - MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); - // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = @@ -1955,7 +1964,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - SDValue Lo, Hi; + SDValue Lo; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), @@ -1970,13 +1979,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi}; - Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); - - // Build a factor node to remember that this store is independent of the - // other one. - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + // The order of the Scatter operation after split is well defined. The "Hi" + // part comes after the "Lo". So these two operations should be chained one + // after another. + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2007,8 +2015,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, @@ -2919,30 +2926,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - EVT BoolVT = getSetCCResultType(WidenVT); - - // We can't use ModifyToType() because we should fill the mask with - // zeroes - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; - - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); - } + // The mask should be widened as well + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType, - N->isExpandingLoad()); + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -2953,12 +2946,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getValue()); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well - Mask = WidenTargetBoolean(Mask, WideVT, true); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); // Widen the Index operand SDValue Index = N->getIndex(); @@ -3032,7 +3029,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Make a new Mask node, with a legal result VT. SmallVector<SDValue, 4> Ops; - for (unsigned i = 0; i < InMask->getNumOperands(); ++i) + for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); @@ -3065,12 +3062,9 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls); EVT SubVT = Mask->getValueType(0); - SmallVector<SDValue, 16> SubConcatOps(NumSubVecs); - SubConcatOps[0] = Mask; - for (unsigned i = 1; i < NumSubVecs; ++i) - SubConcatOps[i] = DAG.getUNDEF(SubVT); - Mask = - DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps); + SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT)); + SubOps[0] = Mask; + Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps); } assert((Mask->getValueType(0) == ToMaskVT) && @@ -3105,7 +3099,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { // If this is a splitted VSELECT that was previously already handled, do // nothing. - if (Cond->getValueType(0).getScalarSizeInBits() != 1) + EVT CondVT = Cond->getValueType(0); + if (CondVT.getScalarSizeInBits() != 1) return SDValue(); EVT VSelVT = N->getValueType(0); @@ -3129,6 +3124,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { EVT SetCCResVT = getSetCCResultType(SetCCOpVT); if (SetCCResVT.getScalarSizeInBits() == 1) return SDValue(); + } else if (CondVT.getScalarType() == MVT::i1) { + // If there is support for an i1 vector mask (or only scalar i1 conditions), + // don't touch. + while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal) + CondVT = TLI.getTypeToTransformTo(Ctx, CondVT); + + if (CondVT.getScalarType() == MVT::i1) + return SDValue(); } // Get the VT and operands for VSELECT, and widen if needed. @@ -3236,19 +3239,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } -SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, - InOp1, InOp2, N->getOperand(2)); -} - SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); @@ -3279,7 +3269,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } -SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operands must be vectors"); @@ -3556,6 +3546,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + assert(OpNo == 3 && "Can widen only data operand of mstore"); MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); EVT MaskVT = Mask.getValueType(); @@ -3564,25 +3555,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { SDValue WideVal = GetWidenedVector(StVal); SDLoc dl(N); - if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - // The mask should be widened as well. - EVT BoolVT = getSetCCResultType(WideVal.getValueType()); - // We can't use ModifyToType() because we should fill the mask with - // zeroes. - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; + // The mask should be widened as well. + EVT WideVT = WideVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); - } assert(Mask.getValueType().getVectorNumElements() == WideVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); @@ -3596,15 +3575,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); + EVT MaskVT = Mask.getValueType(); // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); EVT WideVT = WideVal.getValueType(); - unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well. - Mask = WidenTargetBoolean(Mask, WideVT, true); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), NumElts); + Mask = ModifyToType(Mask, WideMaskVT, true); // Widen index. SDValue Index = MSC->getIndex(); @@ -3806,8 +3788,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, while (LdWidth > 0) { unsigned Increment = NewVTWidth / 8; Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, BasePtr.getValueType())); + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); SDValue L; if (LdWidth < NewVTWidth) { @@ -3839,7 +3820,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } LdOps.push_back(L); - + LdOp = L; LdWidth -= NewVTWidth; } @@ -3929,10 +3910,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, dl, - BasePtr.getValueType())); + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, Align, MMOFlags, AAInfo); @@ -3987,9 +3965,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, - BasePtr.getValueType())); + + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store. @@ -4008,9 +3985,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, - BasePtr.getValueType())); + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth / ValEltWidth; @@ -4053,10 +4028,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, dl, - BasePtr.getValueType())); + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index a21b4c733254..379f0dcef513 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -22,12 +22,12 @@ #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 237d541b4cb9..cf92907a8b5f 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -20,32 +20,31 @@ namespace llvm { -class MDNode; +class DIVariable; +class DIExpression; class SDNode; class Value; -/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// Holds the information from a dbg_value node through SDISel. /// We do not use SDValue here to avoid including its header. - class SDDbgValue { public: enum DbgValueKind { - SDNODE = 0, // value is the result of an expression - CONST = 1, // value is a constant - FRAMEIX = 2 // value is contents of a stack location + SDNODE = 0, ///< Value is the result of an expression. + CONST = 1, ///< Value is a constant. + FRAMEIX = 2 ///< Value is contents of a stack location. }; private: union { struct { - SDNode *Node; // valid for expressions - unsigned ResNo; // valid for expressions + SDNode *Node; ///< Valid for expressions. + unsigned ResNo; ///< Valid for expressions. } s; - const Value *Const; // valid for constants - unsigned FrameIx; // valid for stack objects + const Value *Const; ///< Valid for constants. + unsigned FrameIx; ///< Valid for stack objects. } u; - MDNode *Var; - MDNode *Expr; - uint64_t Offset; + DIVariable *Var; + DIExpression *Expr; DebugLoc DL; unsigned Order; enum DbgValueKind kind; @@ -53,71 +52,65 @@ private: bool Invalid = false; public: - // Constructor for non-constants. - SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, - uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), - IsIndirect(indir) { + /// Constructor for non-constants. + SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R, + bool indir, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; } - // Constructor for constants. - SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, - DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), - IsIndirect(false) { + /// Constructor for constants. + SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { kind = CONST; u.Const = C; } - // Constructor for frame indices. - SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, + /// Constructor for frame indices. + SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), - IsIndirect(false) { + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { kind = FRAMEIX; u.FrameIx = FI; } - // Returns the kind. + /// Returns the kind. DbgValueKind getKind() const { return kind; } - // Returns the MDNode pointer for the variable. - MDNode *getVariable() const { return Var; } + /// Returns the DIVariable pointer for the variable. + DIVariable *getVariable() const { return Var; } - // Returns the MDNode pointer for the expression. - MDNode *getExpression() const { return Expr; } + /// Returns the DIExpression pointer for the expression. + DIExpression *getExpression() const { return Expr; } - // Returns the SDNode* for a register ref + /// Returns the SDNode* for a register ref SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } - // Returns the ResNo for a register ref + /// Returns the ResNo for a register ref unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } - // Returns the Value* for a constant + /// Returns the Value* for a constant const Value *getConst() const { assert (kind==CONST); return u.Const; } - // Returns the FrameIx for a stack object + /// Returns the FrameIx for a stack object unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } - // Returns whether this is an indirect value. + /// Returns whether this is an indirect value. bool isIndirect() const { return IsIndirect; } - // Returns the offset. - uint64_t getOffset() const { return Offset; } - - // Returns the DebugLoc. + /// Returns the DebugLoc. DebugLoc getDebugLoc() const { return DL; } - // Returns the SDNodeOrder. This is the order of the preceding node in the - // input. + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. unsigned getOrder() const { return Order; } - // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" - // property. A SDDbgValue is invalid if the SDNode that produces the value is - // deleted. + /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + /// property. A SDDbgValue is invalid if the SDNode that produces the value is + /// deleted. void setIsInvalidated() { Invalid = true; } bool isInvalidated() const { return Invalid; } }; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 137994093277..698e14453d1d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -18,13 +18,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 70b1fa77a099..49f304c8cc86 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1,4 +1,4 @@ -//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,47 @@ //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <climits> +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -46,6 +70,7 @@ static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); + static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " @@ -105,6 +130,7 @@ static cl::opt<unsigned> AvgIPC( cl::desc("Average inst/cycle whan no target itinerary exists.")); namespace { + //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler /// implementation. This supports both top-down and bottom-up scheduling. @@ -112,7 +138,6 @@ namespace { class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: /// NeedLatency - True if the scheduler will make use of latency information. - /// bool NeedLatency; /// AvailableQueue - The priority queue to use for the available SUnits. @@ -122,13 +147,13 @@ private: /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands becomes available, the instruction is /// added to the AvailableQueue. - std::vector<SUnit*> PendingQueue; + std::vector<SUnit *> PendingQueue; /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; /// CurCycle - The current scheduler state corresponds to this cycle. - unsigned CurCycle; + unsigned CurCycle = 0; /// MinAvailableCycle - Cycle of the soonest available instruction. unsigned MinAvailableCycle; @@ -147,7 +172,9 @@ private: // Collect interferences between physical register use/defs. // Each interference is an SUnit and set of physical registers. SmallVector<SUnit*, 4> Interferences; - typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; + + using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; + LRegsMapT LRegsMap; /// Topo - A topological ordering for SUnits which permits fast IsReachable @@ -163,9 +190,8 @@ public: SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), - NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + NeedLatency(needlatency), AvailableQueue(availqueue), Topo(SUnits, nullptr) { - const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); @@ -267,6 +293,7 @@ private: return !NeedLatency; } }; + } // end anonymous namespace /// GetCostForDef - Looks up the register class and cost for a given definition. @@ -319,13 +346,13 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() - << "********** List Scheduling BB#" << BB->getNumber() - << " '" << BB->getName() << "' **********\n"); + DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); CurCycle = 0; IssueCount = 0; - MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; + MinAvailableCycle = + DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. @@ -409,7 +436,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII) { SDNode *N = Outer; - for (;;) { + while (true) { if (N == Inner) return true; // For a TokenFactor, examine each operand. There may be multiple ways @@ -456,7 +483,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner, static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII) { - for (;;) { + while (true) { // For a TokenFactor, examine each operand. There may be multiple ways // to get to the CALLSEQ_BEGIN, but we need to find the path with the // most nesting in order to ensure that we find the corresponding match. @@ -550,6 +577,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { unsigned NestLevel = 0; unsigned MaxNest = 0; SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + assert(N && "Must find call sequence start"); SUnit *Def = &SUnits[N->getNodeId()]; CallSeqEndForStart[Def] = SU; @@ -571,7 +599,7 @@ void ScheduleDAGRRList::ReleasePending() { // If the available queue is empty, it is safe to reset MinAvailableCycle. if (AvailableQueue->empty()) - MinAvailableCycle = UINT_MAX; + MinAvailableCycle = std::numeric_limits<unsigned>::max(); // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -791,7 +819,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { AvailableQueue->remove(PredSU); } - assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && + "NumSuccsLeft will overflow!"); ++PredSU->NumSuccsLeft; } @@ -821,9 +850,13 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SUNode = SUNode->getGluedNode()) { if (SUNode->isMachineOpcode() && SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { + SUnit *SeqEnd = CallSeqEndForStart[SU]; + assert(SeqEnd && "Call sequence start/end must be known"); + assert(!LiveRegDefs[CallResource]); + assert(!LiveRegGens[CallResource]); ++NumLiveRegs; LiveRegDefs[CallResource] = SU; - LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + LiveRegGens[CallResource] = SeqEnd; } } @@ -835,6 +868,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { if (SUNode->isMachineOpcode() && SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[CallResource]); + assert(LiveRegGens[CallResource]); --NumLiveRegs; LiveRegDefs[CallResource] = nullptr; LiveRegGens[CallResource] = nullptr; @@ -891,7 +926,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { if (LookAhead == 0) return; - std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); unsigned HazardCycle = (*I)->getHeight(); for (auto E = Sequence.end(); I != E; ++I) { SUnit *SU = *I; @@ -1319,8 +1354,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // If we're in the middle of scheduling a call, don't begin scheduling // another call. Also, don't allow any physical registers to be live across // the call. - if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) || - (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { + if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { // Check the special calling-sequence resource. unsigned CallResource = TRI->getNumRegs(); if (LiveRegDefs[CallResource]) { @@ -1390,27 +1424,32 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); - while (CurSU) { - SmallVector<unsigned, 4> LRegs; - if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) - break; - DEBUG(dbgs() << " Interfering reg " << - (LRegs[0] == TRI->getNumRegs() ? "CallResource" - : TRI->getName(LRegs[0])) - << " SU #" << CurSU->NodeNum << '\n'); - std::pair<LRegsMapT::iterator, bool> LRegsPair = - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - if (LRegsPair.second) { - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - Interferences.push_back(CurSU); - } - else { - assert(CurSU->isPending && "Interferences are pending"); - // Update the interference with current live regs. - LRegsPair.first->second = LRegs; + auto FindAvailableNode = [&]() { + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + DEBUG(dbgs() << " Interfering reg "; + if (LRegs[0] == TRI->getNumRegs()) + dbgs() << "CallResource"; + else + dbgs() << printReg(LRegs[0], TRI); + dbgs() << " SU #" << CurSU->NodeNum << '\n'); + std::pair<LRegsMapT::iterator, bool> LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Interferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } + CurSU = AvailableQueue->pop(); } - CurSU = AvailableQueue->pop(); - } + }; + FindAvailableNode(); if (CurSU) return CurSU; @@ -1423,7 +1462,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. SUnit *BtSU = nullptr; - unsigned LiveCycle = UINT_MAX; + unsigned LiveCycle = std::numeric_limits<unsigned>::max(); for (unsigned Reg : LRegs) { if (LiveRegGens[Reg]->getHeight() < LiveCycle) { BtSU = LiveRegGens[Reg]; @@ -1447,13 +1486,16 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // If one or more successors has been unscheduled, then the current // node is no longer available. - if (!TrySU->isAvailable || !TrySU->NodeQueueId) + if (!TrySU->isAvailable || !TrySU->NodeQueueId) { + DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); CurSU = AvailableQueue->pop(); - else { + } else { + DEBUG(dbgs() << "TrySU available\n"); // Available and in AvailableQueue AvailableQueue->remove(TrySU); CurSU = TrySU; } + FindAvailableNode(); // Interferences has been mutated. We must break. break; } @@ -1540,7 +1582,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { while (AvailableQueue->empty() && !PendingQueue.empty()) { // Advance the cycle to free resources. Skip ahead to the next ready SU. - assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && + "MinAvailableCycle uninitialized"); AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); } } @@ -1553,17 +1596,11 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { #endif } -//===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Definition -//===----------------------------------------------------------------------===// -// -// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers -// to reduce register pressure. -// namespace { + class RegReductionPQBase; -struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { +struct queue_sort { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; @@ -1571,6 +1608,7 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} bool operator()(SUnit* left, SUnit* right) const { @@ -1590,6 +1628,7 @@ struct bu_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool operator()(SUnit* left, SUnit* right) const; @@ -1603,8 +1642,8 @@ struct src_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - src_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1617,8 +1656,8 @@ struct hybrid_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - hybrid_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1634,8 +1673,8 @@ struct ilp_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - ilp_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1644,8 +1683,8 @@ struct ilp_ls_rr_sort : public queue_sort { class RegReductionPQBase : public SchedulingPriorityQueue { protected: - std::vector<SUnit*> Queue; - unsigned CurQueueId; + std::vector<SUnit *> Queue; + unsigned CurQueueId = 0; bool TracksRegPressure; bool SrcOrder; @@ -1656,13 +1695,12 @@ protected: const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const TargetLowering *TLI; - ScheduleDAGRRList *scheduleDAG; + ScheduleDAGRRList *scheduleDAG = nullptr; // SethiUllmanNumbers - The SethiUllman number for each node. std::vector<unsigned> SethiUllmanNumbers; /// RegPressure - Tracking current reg pressure per register class. - /// std::vector<unsigned> RegPressure; /// RegLimit - Tracking the number of allocatable registers per register @@ -1677,9 +1715,8 @@ public: const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { + : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), + SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); RegLimit.resize(NumRC); @@ -1730,7 +1767,7 @@ public: void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); - std::vector<SUnit *>::iterator I = find(Queue, SU); + std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); @@ -1759,7 +1796,7 @@ protected: }; template<class SF> -static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { +static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) @@ -1772,7 +1809,7 @@ static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { } template<class SF> -SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { #ifndef NDEBUG if (DAG->StressSched) { reverse_sort<SF> RPicker(Picker); @@ -1783,6 +1820,13 @@ SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { return popFromQueueImpl(Q, Picker); } +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// template<class SF> class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; @@ -1815,7 +1859,7 @@ public: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { // Emulate pop() without clobbering NodeQueueIds. - std::vector<SUnit*> DumpQueue = Queue; + std::vector<SUnit *> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); @@ -1826,17 +1870,11 @@ public: #endif }; -typedef RegReductionPriorityQueue<bu_ls_rr_sort> -BURegReductionPriorityQueue; - -typedef RegReductionPriorityQueue<src_ls_rr_sort> -SrcRegReductionPriorityQueue; +using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; +using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; +using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; +using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; -typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> -HybridBURRPriorityQueue; - -typedef RegReductionPriorityQueue<ilp_ls_rr_sort> -ILPBURRPriorityQueue; } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -2855,7 +2893,6 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// This results in the store being scheduled immediately /// after N, which shortens the U->N live range, reducing /// register pressure. -/// void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. for (SUnit &SU : *SUnits) { @@ -3022,7 +3059,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { // Public Constructor Functions //===----------------------------------------------------------------------===// -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3036,7 +3073,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3050,7 +3087,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3066,7 +3103,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3c8526ebb702..c09b47af26a6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -23,14 +23,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -709,18 +709,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); - for (unsigned i = 0, e = DVs.size(); i != e; ++i) { - if (DVs[i]->isInvalidated()) + for (auto DV : DAG->GetDbgValues(N)) { + if (DV->isInvalidated()) continue; - unsigned DVOrder = DVs[i]->getOrder(); + unsigned DVOrder = DV->getOrder(); if (!Order || DVOrder == Order) { - MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap); if (DbgMI) { - Orders.push_back(std::make_pair(DVOrder, DbgMI)); + Orders.push_back({DVOrder, DbgMI}); BB->insert(InsertPos, DbgMI); } - DVs[i]->setIsInvalidated(); + DV->setIsInvalidated(); } } } @@ -742,16 +741,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + auto IP = Emitter.getInsertPos(); + if (IP == BB->begin() || BB->back().isPHI() || // Fast-isel may have inserted some instructions, in which case the // BB->back().isPHI() test will not fire when we want it to. - std::prev(Emitter.getInsertPos())->isPHI()) { + std::prev(IP)->isPHI()) { // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr)); + Orders.push_back({Order, (MachineInstr *)nullptr}); return; } - Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos()))); + Orders.push_back({Order, &*std::prev(IP)}); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } @@ -856,8 +856,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug - // values. - std::sort(Orders.begin(), Orders.end(), less_first()); + // values. Use stable_sort so that DBG_VALUEs are inserted in the same order + // regardless of the host's implementation fo std::sort. + std::stable_sort(Orders.begin(), Orders.end(), less_first()); + std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(), + [](const SDDbgValue *LHS, const SDDbgValue *RHS) { + return LHS->getOrder() < RHS->getOrder(); + }); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); @@ -869,10 +874,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; - for (; DI != DE && - (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { + for (; DI != DE; ++DI) { + if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) + break; if ((*DI)->isInvalidated()) continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); if (DbgMI) { if (!LastOrder) @@ -891,11 +898,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? SmallVector<MachineInstr*, 8> DbgMIs; - while (DI != DE) { - if (!(*DI)->isInvalidated()) - if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) - DbgMIs.push_back(DbgMI); - ++DI; + for (; DI != DE; ++DI) { + if ((*DI)->isInvalidated()) + continue; + assert((*DI)->getOrder() >= LastOrder && + "emitting DBG_VALUE out of order"); + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); } MachineBasicBlock *InsertBB = Emitter.getBlock(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 631cb34717c4..07b46b9183ab 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -93,9 +93,8 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGVLIW::Schedule() { - DEBUG(dbgs() - << "********** List Scheduling BB#" << BB->getNumber() - << " '" << BB->getName() << "' **********\n"); + DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. BuildSchedGraph(AA); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 16f425dc7969..12a21e74079e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -37,6 +37,9 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -59,11 +62,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -87,6 +87,15 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +#define DEBUG_TYPE "selectiondag" + +static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { + DEBUG( + dbgs() << Msg; + V.getNode()->dump(G); + ); +} + //===----------------------------------------------------------------------===// // ConstantFPSDNode Class //===----------------------------------------------------------------------===// @@ -116,8 +125,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // ISD Namespace //===----------------------------------------------------------------------===// -bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, - bool AllowShrink) { +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { auto *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; @@ -126,10 +134,9 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, unsigned SplatBitSize; bool HasUndefs; unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); - unsigned MinSplatBits = AllowShrink ? 0 : EltSize; return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, - MinSplatBits) && - EltSize >= SplatBitSize; + EltSize) && + EltSize == SplatBitSize; } // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be @@ -895,12 +902,14 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) } void SelectionDAG::init(MachineFunction &NewMF, - OptimizationRemarkEmitter &NewORE) { + OptimizationRemarkEmitter &NewORE, + Pass *PassPtr) { MF = &NewMF; + SDAGISelPass = PassPtr; ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); - Context = &MF->getFunction()->getContext(); + Context = &MF->getFunction().getContext(); } SelectionDAG::~SelectionDAG() { @@ -1018,7 +1027,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); - if (Op.getValueType() == VT) return Op; + if (Op.getValueType().getScalarType() == VT) return Op; unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); @@ -1156,7 +1165,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); - return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + + SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + return V; } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && @@ -1176,11 +1187,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); } SDValue Result(N, 0); if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); + return Result; } @@ -1222,6 +1235,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, SDValue Result(N, 0); if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); + NewSDValueDbgMsg(Result, "Creating fp constant: ", this); return Result; } @@ -1317,7 +1331,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction()->optForSize() + Alignment = MF->getFunction().optForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -1471,7 +1485,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // Validate that all indices in Mask are within the range of the elements // input to the shuffle. int NElts = Mask.size(); - assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + assert(llvm::all_of(Mask, + [&](int M) { return M < (NElts * 2) && M >= -1; }) && "Index out of range"); // Copy the mask so we can do any needed cleanup. @@ -1622,7 +1637,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { @@ -1665,15 +1682,20 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label) { + return getLabelNode(ISD::EH_LABEL, dl, Root, Label); +} + +SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, + SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; - AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); + AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); ID.AddPointer(Label); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); + auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -1955,6 +1977,69 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, return SDValue(); } +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by Mask are used. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: + break; + case ISD::Constant: { + const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) + return getConstant(NewVal, SDLoc(V), V.getValueType()); + break; + } + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) + break; + APInt NewMask = Mask << Amt; + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) + return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, + V.getOperand(1)); + } + break; + case ISD::AND: { + // X & -1 -> X (ignoring bits which aren't demanded). + ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); + if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue())) + return V.getOperand(0); + break; + } + case ISD::ANY_EXTEND: { + SDValue Src = V.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + // Being conservative here - only peek through if we only demand bits in the + // non-extended source (even though the extended bits are technically undef). + if (Mask.getActiveBits() > SrcBitWidth) + break; + APInt SrcMask = Mask.trunc(SrcBitWidth); + if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask)) + return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); + break; + } + } + return SDValue(); +} + /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { @@ -1972,6 +2057,30 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, return Mask.isSubsetOf(Known.Zero); } +/// Helper function that checks to see if a node is a constant or a +/// build vector of splat constants at least within the demanded elts. +static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N, + const APInt &DemandedElts) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + if (N.getOpcode() != ISD::BUILD_VECTOR) + return nullptr; + EVT VT = N.getValueType(); + ConstantSDNode *Cst = nullptr; + unsigned NumElts = VT.getVectorNumElements(); + assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i)); + if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) || + C->getValueType(0) != VT.getScalarType()) + return nullptr; + Cst = C; + } + return Cst; +} + /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { @@ -2005,6 +2114,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned BitWidth = Op.getScalarValueSizeInBits(); Known = KnownBits(BitWidth); // Don't know anything. + + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + // We know all of the bits for a constant! + Known.One = C->getAPIntValue(); + Known.Zero = ~Known.One; + return; + } + if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { + // We know all of the bits for a constant fp! + Known.One = C->getValueAPF().bitcastToAPInt(); + Known.Zero = ~Known.One; + return; + } + if (Depth == 6) return; // Limit search depth. @@ -2016,11 +2139,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned Opcode = Op.getOpcode(); switch (Opcode) { - case ISD::Constant: - // We know all of the bits for a constant! - Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); - Known.Zero = ~Known.One; - break; case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. assert(NumElts == Op.getValueType().getVectorNumElements() && @@ -2045,7 +2163,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } break; @@ -2083,7 +2201,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); @@ -2109,11 +2227,45 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } break; } + case ISD::INSERT_SUBVECTOR: { + // If we know the element index, demand any elements from the subvector and + // the remainder from the src its inserted into, otherwise demand them all. + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { + Known.One.setAllBits(); + Known.Zero.setAllBits(); + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + if (!!DemandedSubElts) { + computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1); + if (Known.isUnknown()) + break; // early-out. + } + APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~SubMask; + if (!!DemandedSrcElts) { + computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } else { + computeKnownBits(Sub, Known, Depth + 1); + if (Known.isUnknown()) + break; // early-out. + computeKnownBits(Src, Known2, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + break; + } case ISD::EXTRACT_SUBVECTOR: { // If we know the element index, just demand that subvector elements, // otherwise demand them all. @@ -2132,10 +2284,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::BITCAST: { SDValue N0 = Op.getOperand(0); - unsigned SubBitWidth = N0.getScalarValueSizeInBits(); + EVT SubVT = N0.getValueType(); + unsigned SubBitWidth = SubVT.getScalarSizeInBits(); - // Ignore bitcasts from floating point. - if (!N0.getValueType().isInteger()) + // Ignore bitcasts from unsupported types. + if (!(SubVT.isInteger() || SubVT.isFloatingPoint())) break; // Fast handling of 'identity' bitcasts. @@ -2193,7 +2346,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } } @@ -2264,22 +2417,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::SELECT: - computeKnownBits(Op.getOperand(2), Known, Depth+1); + case ISD::VSELECT: + computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(1), Known2, Depth+1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), Known, Depth+1); + computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(2), Known2, Depth+1); + computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -2308,35 +2462,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero <<= *ShAmt; - Known.One <<= *ShAmt; + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero <<= Shift; + Known.One <<= Shift; // Low bits are known zero. - Known.Zero.setLowBits(ShAmt->getZExtValue()); + Known.Zero.setLowBits(Shift); } break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero.lshrInPlace(*ShAmt); - Known.One.lshrInPlace(*ShAmt); + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero.lshrInPlace(Shift); + Known.One.lshrInPlace(Shift); // High bits are known zero. - Known.Zero.setHighBits(ShAmt->getZExtValue()); + Known.Zero.setHighBits(Shift); + } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) { + // If the shift amount is a vector of constants see if we can bound + // the number of upper zero bits. + unsigned ShiftAmountMin = BitWidth; + for (unsigned i = 0; i != BV->getNumOperands(); ++i) { + if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) { + const APInt &ShAmt = C->getAPIntValue(); + if (ShAmt.ult(BitWidth)) { + ShiftAmountMin = std::min<unsigned>(ShiftAmountMin, + ShAmt.getZExtValue()); + continue; + } + } + // Don't know anything. + ShiftAmountMin = 0; + break; + } + + Known.Zero.setHighBits(ShiftAmountMin); } break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero.lshrInPlace(*ShAmt); - Known.One.lshrInPlace(*ShAmt); - // If we know the value of the sign bit, then we know it is copied across - // the high bits by the shift amount. - APInt SignMask = APInt::getSignMask(BitWidth); - SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. - if (Known.Zero.intersects(SignMask)) { - Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (Known.One.intersects(SignMask)) { - Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one. - } + unsigned Shift = ShAmt->getZExtValue(); + // Sign extend known zero/one bit (else is unknown). + Known.Zero.ashrInPlace(Shift); + Known.One.ashrInPlace(Shift); } break; case ISD::SIGN_EXTEND_INREG: { @@ -2414,49 +2582,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.trunc(InBits); - computeKnownBits(Op.getOperand(0), Known, - DemandedElts.zext(InVT.getVectorNumElements()), - Depth + 1); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); + computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1); Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InBits); + Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InBits); + Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } // TODO ISD::SIGN_EXTEND_VECTOR_INREG case ISD::SIGN_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - - Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, Depth+1); Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.zext(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); Known = Known.trunc(BitWidth); break; @@ -2755,7 +2907,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; @@ -2764,11 +2916,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - if (unsigned Align = InferPtrAlignment(Op)) { - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); - break; - } + TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); break; default: @@ -2783,7 +2931,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -2873,12 +3021,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned Depth) const { EVT VT = Op.getValueType(); - assert(VT.isInteger() && "Invalid VT!"); + assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); unsigned NumElts = DemandedElts.getBitWidth(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + const APInt &Val = C->getAPIntValue(); + return Val.getNumSignBits(); + } + if (Depth == 6) return 1; // Limit search depth. @@ -2894,11 +3047,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); return VTBits-Tmp; - case ISD::Constant: { - const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); - return Val.getNumSignBits(); - } - case ISD::BUILD_VECTOR: Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { @@ -2952,32 +3100,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } + case ISD::BITCAST: { + SDValue N0 = Op.getOperand(0); + EVT SrcVT = N0.getValueType(); + unsigned SrcBits = SrcVT.getScalarSizeInBits(); + + // Ignore bitcasts from unsupported types.. + if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint())) + break; + + // Fast handling of 'identity' bitcasts. + if (VTBits == SrcBits) + return ComputeNumSignBits(N0, DemandedElts, Depth + 1); + + // Bitcast 'large element' scalar/vector to 'small element' vector. + // TODO: Handle cases other than 'sign splat' when we have a use case. + // Requires handling of DemandedElts and Endianness. + if ((SrcBits % VTBits) == 0) { + assert(Op.getValueType().isVector() && "Expected bitcast to vector"); + Tmp = ComputeNumSignBits(N0, Depth + 1); + if (Tmp == SrcBits) + return VTBits; + } + break; + } + case ISD::SIGN_EXTEND: - case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); - return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; - + return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: // Max of the input and what this extends. Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); Tmp = VTBits-Tmp+1; - - Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); return std::max(Tmp, Tmp2); + case ISD::SIGN_EXTEND_VECTOR_INREG: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); + Tmp = VTBits - SrcVT.getScalarSizeInBits(); + return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; + } case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. - if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { + if (ConstantSDNode *C = + isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { APInt ShiftVal = C->getAPIntValue(); ShiftVal += Tmp; Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); } return Tmp; case ISD::SHL: - if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { + if (ConstantSDNode *C = + isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); if (C->getAPIntValue().uge(VTBits) || // Bad shift. C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out. return Tmp - C->getZExtValue(); @@ -2987,9 +3166,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::OR: case ISD::XOR: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -2998,15 +3177,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::SELECT: - Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + case ISD::VSELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); return std::min(Tmp, Tmp2); case ISD::SELECT_CC: - Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1); return std::min(Tmp, Tmp2); + case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3041,16 +3222,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::ROTL: case ISD::ROTR: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned RotAmt = C->getZExtValue() & (VTBits-1); + unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. if (Op.getOpcode() == ISD::ROTR) - RotAmt = (VTBits-RotAmt) & (VTBits-1); + RotAmt = (VTBits - RotAmt) % VTBits; // If we aren't rotating out all of the known-in sign bits, return the // number that are left. This handles rotl(sext(x), 1) for example. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp > RotAmt+1) return Tmp-RotAmt; + if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); } break; case ISD::ADD: @@ -3391,7 +3572,9 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ? DAG.getZExtOrTrunc(Op, DL, SVT) : DAG.getSExtOrTrunc(Op, DL, SVT); - return DAG.getBuildVector(VT, DL, Elts); + SDValue V = DAG.getBuildVector(VT, DL, Elts); + NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); + return V; } /// Gets or creates the specified node. @@ -3407,7 +3590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -3768,7 +3953,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, @@ -3906,18 +4093,31 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); EVT SVT = VT.getScalarType(); + EVT LegalSVT = SVT; + if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } SmallVector<SDValue, 4> Outputs; for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { SDValue V1 = BV1->getOperand(I); SDValue V2 = BV2->getOperand(I); - // Avoid BUILD_VECTOR nodes that perform implicit truncation. - // FIXME: This is valid and could be handled by truncation. + if (SVT.isInteger()) { + if (V1->getValueType(0).bitsGT(SVT)) + V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); + if (V2->getValueType(0).bitsGT(SVT)) + V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); + } + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) return SDValue(); // Fold one vector element. SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && @@ -3936,6 +4136,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, return getBuildVector(VT, SDLoc(), Outputs); } +// TODO: Merge with FoldConstantArithmetic SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, @@ -4027,7 +4228,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, ScalarResults.push_back(ScalarResult); } - return getBuildVector(VT, DL, ScalarResults); + SDValue V = getBuildVector(VT, DL, ScalarResults); + NewSDValueDbgMsg(V, "New node fold constant vector: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -4297,6 +4500,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); } } + + // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed + // when vector types are scalarized and v1iX is legal. + // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) + if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getValueType().getVectorNumElements() == 1) { + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), + N1.getOperand(1)); + } break; case ISD::EXTRACT_ELEMENT: assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); @@ -4518,7 +4730,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -4553,8 +4767,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; // Vector constant folding. SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { + NewSDValueDbgMsg(V, "New node vector constant folding: ", this); return V; + } break; } case ISD::SELECT: @@ -4626,7 +4842,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -4882,8 +5100,8 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) - return MF.getFunction()->optForMinSize(); - return MF.getFunction()->optForSize(); + return MF.getFunction().optForMinSize(); + return MF.getFunction().optForSize(); } static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, @@ -5558,21 +5776,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, - bool ReadMem, bool WriteMem, unsigned Size) { + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + MachineMemOperand::Flags Flags, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); - MachineFunction &MF = getMachineFunction(); - auto Flags = MachineMemOperand::MONone; - if (WriteMem) - Flags |= MachineMemOperand::MOStore; - if (ReadMem) - Flags |= MachineMemOperand::MOLoad; - if (Vol) - Flags |= MachineMemOperand::MOVolatile; if (!Size) Size = MemVT.getStoreSize(); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); @@ -5597,6 +5809,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); + ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( + Opcode, dl.getIROrder(), VTList, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5622,7 +5836,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, int64_t Offset = 0) { // If this is FI+Offset, we can model it. if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) @@ -5633,7 +5848,7 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, if (Ptr.getOpcode() != ISD::ADD || !isa<ConstantSDNode>(Ptr.getOperand(1)) || !isa<FrameIndexSDNode>(Ptr.getOperand(0))) - return MachinePointerInfo(); + return Info; int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); return MachinePointerInfo::getFixedStack( @@ -5645,14 +5860,15 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, SDValue OffsetOp) { // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) - return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); + return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); if (OffsetOp.isUndef()) - return InferPointerInfo(DAG, Ptr); - return MachinePointerInfo(); + return InferPointerInfo(Info, DAG, Ptr); + return Info; } SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, @@ -5672,7 +5888,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr, Offset); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -5791,7 +6007,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -5841,7 +6057,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -6118,7 +6334,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, @@ -6171,7 +6389,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, @@ -6580,14 +6800,16 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { unsigned OrigOpc = Node->getOpcode(); unsigned NewOpc; bool IsUnary = false; + bool IsTernary = false; switch (OrigOpc) { - default: + default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break; case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; @@ -6614,10 +6836,14 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { SDNode *Res = nullptr; if (IsUnary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); + else if (IsTernary) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), + Node->getOperand(2), + Node->getOperand(3)}); else Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), Node->getOperand(2) }); - + // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it // updates the node in place to have the requested operands. @@ -6630,7 +6856,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { RemoveDeadNode(Node); } - return Res; + return Res; } /// getMachineNode - These are used for target selectors to create a new node @@ -6794,32 +7020,125 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode -SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, - unsigned R, bool IsIndirect, uint64_t Off, +SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, + SDNode *N, unsigned R, bool IsIndirect, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) - SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); + SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O); } /// Constant -SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, - const Value *C, uint64_t Off, +SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, + DIExpression *Expr, + const Value *C, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O); } /// FrameIndex -SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, - unsigned FI, uint64_t Off, +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, + DIExpression *Expr, unsigned FI, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); +} + +void SelectionDAG::transferDbgValues(SDValue From, SDValue To, + unsigned OffsetInBits, unsigned SizeInBits, + bool InvalidateDbg) { + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + assert(FromNode && ToNode && "Can't modify dbg values"); + + // PR35338 + // TODO: assert(From != To && "Redundant dbg value transfer"); + // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); + if (From == To || FromNode == ToNode) + return; + + if (!FromNode->getHasDebugValue()) + return; + + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { + if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) + continue; + + // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); + + // Just transfer the dbg value attached to From. + if (Dbg->getResNo() != From.getResNo()) + continue; + + DIVariable *Var = Dbg->getVariable(); + auto *Expr = Dbg->getExpression(); + // If a fragment is requested, update the expression. + if (SizeInBits) { + // When splitting a larger (e.g., sign-extended) value whose + // lower bits are described with an SDDbgValue, do not attempt + // to transfer the SDDbgValue to the upper bits. + if (auto FI = Expr->getFragmentInfo()) + if (OffsetInBits + SizeInBits > FI->SizeInBits) + continue; + auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, + SizeInBits); + if (!Fragment) + continue; + Expr = *Fragment; + } + // Clone the SDDbgValue and move it to To. + SDDbgValue *Clone = + getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), + Dbg->getDebugLoc(), Dbg->getOrder()); + ClonedDVs.push_back(Clone); + + if (InvalidateDbg) + Dbg->setIsInvalidated(); + } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, ToNode, false); +} + +void SelectionDAG::salvageDebugInfo(SDNode &N) { + if (!N.getHasDebugValue()) + return; + for (auto DV : GetDbgValues(&N)) { + if (DV->isInvalidated()) + continue; + switch (N.getOpcode()) { + default: + break; + case ISD::ADD: + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if (!isConstantIntBuildVectorOrConstantInt(N0) && + isConstantIntBuildVectorOrConstantInt(N1)) { + uint64_t Offset = N.getConstantOperandVal(1); + // Rewrite an ADD constant node into a DIExpression. Since we are + // performing arithmetic to compute the variable's *value* in the + // DIExpression, we need to mark the expression with a + // DW_OP_stack_value. + auto *DIExpr = DV->getExpression(); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::NoDeref, + DIExpression::WithStackValue); + SDDbgValue *Clone = + getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), + DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + DV->setIsInvalidated(); + AddDbgValue(Clone, N0.getNode(), false); + DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); + } + } + } } namespace { @@ -6859,7 +7178,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { assert(From != To.getNode() && "Cannot replace uses of with self"); // Preserve Debug Values - TransferDbgValues(FromN, To); + transferDbgValues(FromN, To); // Iterate over all the existing uses of From. New uses will be added // to the beginning of the use list, which we avoid visiting. @@ -6918,7 +7237,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) if (From->hasAnyUseOfValue(i)) { assert((i < To->getNumValues()) && "Invalid To location"); - TransferDbgValues(SDValue(From, i), SDValue(To, i)); + transferDbgValues(SDValue(From, i), SDValue(To, i)); } // Iterate over just the existing users of From. See the comments in @@ -6962,7 +7281,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { // Preserve Debug Info. for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) - TransferDbgValues(SDValue(From, i), *To); + transferDbgValues(SDValue(From, i), *To); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -7009,7 +7328,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ } // Preserve Debug Info. - TransferDbgValues(From, To); + transferDbgValues(From, To); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -7087,7 +7406,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, if (Num == 1) return ReplaceAllUsesOfValueWith(*From, *To); - TransferDbgValues(*From, *To); + transferDbgValues(*From, *To); // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the @@ -7236,35 +7555,6 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } -/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes. -void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { - if (From == To || !From.getNode()->getHasDebugValue()) - return; - SDNode *FromNode = From.getNode(); - SDNode *ToNode = To.getNode(); - ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); - SmallVector<SDDbgValue *, 2> ClonedDVs; - for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); - I != E; ++I) { - SDDbgValue *Dbg = *I; - // Only add Dbgvalues attached to same ResNo. - if (Dbg->getKind() == SDDbgValue::SDNODE && - Dbg->getSDNode() == From.getNode() && - Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) { - assert(FromNode != ToNode && - "Should not transfer Debug Values intranode"); - SDDbgValue *Clone = - getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, - To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), - Dbg->getDebugLoc(), Dbg->getOrder()); - ClonedDVs.push_back(Clone); - Dbg->setIsInvalidated(); - } - } - for (SDDbgValue *I : ClonedDVs) - AddDbgValue(I, ToNode, false); -} - SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 0d69441ebb7f..544da362be69 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -1,5 +1,4 @@ -//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address -//Analysis ---*- C++ -*-===// +//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==// // // The LLVM Compiler Infrastructure // @@ -7,15 +6,18 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/Casting.h" +#include <cstdint> -namespace llvm { +using namespace llvm; bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) { @@ -55,7 +57,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, /// Parses tree in Ptr for base, index, offset addresses. BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { // (((B + I*M) + c)) + c ... - SDValue Base = Ptr; + SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr); SDValue Index = SDValue(); int64_t Offset = 0; bool IsIndexSignExt = false; @@ -112,4 +114,3 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { } return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); } -} // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 127312076207..71cb8cb78f6d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,63 +12,113 @@ //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" -#include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <numeric> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "isel" @@ -78,11 +128,18 @@ using namespace llvm; static unsigned LimitFloatPrecision; static cl::opt<unsigned, true> -LimitFPPrecision("limit-float-precision", - cl::desc("Generate low-precision inline sequences " - "for some float libcalls"), - cl::location(LimitFloatPrecision), - cl::init(0)); + LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), cl::Hidden, + cl::init(0)); + +static cl::opt<unsigned> SwitchPeelThreshold( + "switch-peel-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold for peeling the case from a " + "switch statement. A value greater than 100 will void this " + "optimization")); + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -101,7 +158,7 @@ static const unsigned MaxParallelChains = 64; // True if the Value passed requires ABI mangling as it is a parameter to a // function or a return value from a function which is not an intrinsic. -static bool isABIRegCopy(const Value * V) { +static bool isABIRegCopy(const Value *V) { const bool IsRetInst = V && isa<ReturnInst>(V); const bool IsCallInst = V && isa<CallInst>(V); const bool IsInLineAsm = @@ -554,7 +611,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, bool IsABIRegCopy) { - EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -600,7 +656,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - } else { assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && "lossy conversion of vector to scalar type"); @@ -677,8 +732,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() { IsABIMangled = false; } - RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, bool IsABIMangledValue) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), @@ -888,7 +941,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + if (Code == InlineAsm::Kind_Clobber) { + // Clobbers should always have a 1:1 mapping with registers, and may + // reference registers that have illegal (e.g. vector) types. Hence, we + // shouldn't try to apply any sort of splitting logic to them. + assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && + "No 1:1 mapping from clobbers to regs?"); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + (void)SP; + for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { + Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); + assert( + (Regs[I] != SP || + DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && + "If we clobbered the stack pointer, MFI should know about it."); + } + return; + } + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; @@ -896,11 +966,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); - - if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { - // If we clobbered the stack pointer, MFI should know about it. - assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()); - } } } } @@ -1025,12 +1090,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); - uint64_t Offset = DI->getOffset(); SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, - Val)) { - SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1409,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, DL, + F->getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), @@ -1421,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); - // An aggregate return value cannot wrap around the address space, so - // offsets to its parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), - RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc()), - Flags); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo()); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1515,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT(TLI.getPointerTy(DL)))); } - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); CallingConv::ID CallConv = - DAG.getMachineFunction().getFunction()->getCallingConv(); + DAG.getMachineFunction().getFunction().getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); @@ -1623,7 +1681,6 @@ static bool InBlock(const Value *V, const BasicBlock *BB) { /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. -/// void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, @@ -1659,7 +1716,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TProb, FProb); + TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); return; } @@ -1668,7 +1725,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TProb, FProb); + nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); } @@ -1712,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - BOpc != Opc || !BOp->hasOneUse() || + BOpc != unsigned(Opc) || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { @@ -1867,7 +1924,6 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // je foo // cmp D, E // jle foo - // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && @@ -1907,7 +1963,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - nullptr, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1920,7 +1976,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - SDLoc dl = getCurSDLoc(); + SDLoc dl = CB.DL; // Build the setcc now. if (!CB.CmpMHS) { @@ -2054,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { @@ -2088,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + const Module &M = *ParentBB->getParent()->getFunction().getParent(); unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. - SDValue StackSlot = DAG.getLoad( + SDValue GuardVal = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. @@ -2108,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = StackSlot; + Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; @@ -2141,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -2151,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, StackSlot.getOperand(0), + MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, @@ -2530,7 +2589,7 @@ static bool isVectorReductionOp(const User *I) { case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().unsafeAlgebra()) + if (FPOp->getFastMathFlags().isFast()) break; LLVM_FALLTHROUGH; default: @@ -2576,7 +2635,7 @@ static bool isVectorReductionOp(const User *I) { if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = @@ -2670,7 +2729,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + Flags.setUnsafeAlgebra(FMF.isFast()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); @@ -2779,7 +2838,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { - return all_of(Cond->users(), [](const Value *V) { + return llvm::all_of(Cond->users(), [](const Value *V) { return isa<SelectInst>(V); }); } @@ -3447,7 +3506,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -3468,17 +3527,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - AllocSize = DAG.getNode(ISD::ADD, dl, - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, dl, - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), - dl)); + AllocSize = + DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; + SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); @@ -3807,18 +3864,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we -// extract the spalt value and use it as a uniform base. +// extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -// static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP || GEP->getNumOperands() > 2) + if (!GEP) return false; const Value *GEPPtr = GEP->getPointerOperand(); @@ -3827,7 +3882,15 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Value *IndexVal = GEP->getOperand(1); + unsigned FinalIndex = GEP->getNumOperands() - 1; + Value *IndexVal = GEP->getOperand(FinalIndex); + + // Ensure all the other indices are 0. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!C || !C->isZero()) + return false; + } // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. @@ -3837,13 +3900,6 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); - // Suppress sign extension. - if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { - if (SDB->findValue(Sext->getOperand(0))) { - IndexVal = Sext->getOperand(0); - Index = SDB->getValue(IndexVal); - } - } if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); @@ -4082,7 +4138,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic load"); MachineMemOperand *MMO = @@ -4118,7 +4175,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic store"); SDValue OutChain = @@ -4157,7 +4214,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, + DAG.getMachineFunction(), + Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || @@ -4183,11 +4242,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.vol, - Info.readMem, Info.writeMem, Info.size); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, + Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, + Info.flags, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4370,7 +4428,6 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4469,7 +4526,6 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4567,7 +4623,6 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4695,7 +4750,6 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } - /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { @@ -4712,8 +4766,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->optForSize() || + const Function &F = DAG.getMachineFunction().getFunction(); + if (!F.optForSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -4766,12 +4820,12 @@ static unsigned getUnderlyingArgReg(const SDValue &N) { } } -/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function -/// argument, create the corresponding DBG_VALUE machine instruction for it now. -/// At the end of instruction selection, they will be inserted to the entry BB. +/// If the DbgValueInst is a dbg_value of a function argument, create the +/// corresponding DBG_VALUE machine instruction for it now. At the end of +/// instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) { + DILocation *DL, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4779,17 +4833,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Ignore inlined function arguments here. - // - // FIXME: Should we be checking DL->inlinedAt() to determine this? - if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) - return false; - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); - if (FI != INT_MAX) + if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4806,22 +4854,48 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } + if (!Op && N.getNode()) + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); + if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { + const auto &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, + V->getType(), isABIRegCopy(V)); + unsigned NumRegs = + std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); + if (NumRegs > 1) { + unsigned I = 0; + unsigned Offset = 0; + auto RegisterVT = RFV.RegVTs.begin(); + for (auto RegCount : RFV.RegCount) { + unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) { + // The vregs are guaranteed to be allocated in sequence. + Op = MachineOperand::CreateReg(VMI->second + I, false); + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegisterSize); + if (!FragmentExpr) + continue; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, + Op->getReg(), Variable, *FragmentExpr)); + Offset += RegisterSize; + } + } + return true; + } Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; } } - if (!Op && N.getNode()) - // Check if frame index is available. - if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) - if (FrameIndexSDNode *FINode = - dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Op) return false; @@ -4830,12 +4904,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - Op->getReg(), Offset, Variable, Expr)); + Op->getReg(), Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .add(*Op) - .addImm(Offset) + .addImm(0) .addMetadata(Variable) .addMetadata(Expr)); @@ -4845,18 +4919,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( /// Return the appropriate SDDbgValue based on N. SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, - DIExpression *Expr, int64_t Offset, + DIExpression *Expr, const DebugLoc &dl, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, DbgSDNodeOrder); } - return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, + DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -4971,8 +5045,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memcpy_element_unordered_atomic: { - const ElementUnorderedAtomicMemCpyInst &MI = - cast<ElementUnorderedAtomicMemCpyInst>(I); + const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); @@ -5010,7 +5083,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove_element_unordered_atomic: { - auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + auto &MI = cast<AtomicMemMoveInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); @@ -5048,7 +5121,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset_element_unordered_atomic: { - auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + auto &MI = cast<AtomicMemSetInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); @@ -5086,30 +5159,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { - const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); - if (!Address) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return nullptr; - } // Check if address has undef value. - if (isa<UndefValue>(Address) || + const Value *Address = DI.getVariableLocation(); + if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } - // Byval arguments with frame indices were already handled after argument - // lowering and before isel. - const auto *Arg = - dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); - if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + + // Check if this variable can be described by a frame index, typically + // either as a static alloca or a byval parameter. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = + dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) { + if (AI->isStaticAlloca()) { + auto I = FuncInfo.StaticAllocaMap.find(AI); + if (I != FuncInfo.StaticAllocaMap.end()) + FI = I->second; + } + } else if (const auto *Arg = dyn_cast<Argument>( + Address->stripInBoundsConstantOffsets())) { + FI = FuncInfo.getArgumentFrameIndex(Arg); + } + + // llvm.dbg.addr is control dependent and always generates indirect + // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in + // the MachineFunction variable table. + if (FI != std::numeric_limits<int>::max()) { + if (Intrinsic == Intrinsic::dbg_addr) + DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, + SDNodeOrder), + getRoot().getNode(), isParameter); return nullptr; + } SDValue &N = NodeMap[Address]; if (!N.getNode() && isa<Argument>(Address)) @@ -5120,26 +5211,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->isParameter() || isa<Argument>(Address); auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, - FINode->getIndex(), 0, dl, SDNodeOrder); + FINode->getIndex(), dl, SDNodeOrder); } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - true, 0, dl, SDNodeOrder); + true, dl, SDNodeOrder); } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } @@ -5152,15 +5242,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) return nullptr; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, - SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; } @@ -5171,10 +5259,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, - N)) + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N)) return nullptr; - SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); + SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); return nullptr; } @@ -5213,12 +5300,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); return nullptr; - case Intrinsic::eh_dwarf_cfa: { + case Intrinsic::eh_dwarf_cfa: setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); @@ -5247,17 +5333,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Op.getValue(1)); return nullptr; } - case Intrinsic::eh_sjlj_longjmp: { + case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return nullptr; - } - case Intrinsic::eh_sjlj_setup_dispatch: { + case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return nullptr; - } - case Intrinsic::masked_gather: visitMaskedGather(I); return nullptr; @@ -5430,6 +5513,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -5534,11 +5618,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res.getValue(1)); return nullptr; } - case Intrinsic::stackrestore: { + case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; - } case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); @@ -5557,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - const Module &M = *MF.getFunction()->getParent(); + const Module &M = *MF.getFunction().getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); @@ -5568,6 +5651,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return nullptr; @@ -5624,9 +5709,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::assume: case Intrinsic::var_annotation: - // Discard annotate attributes and assumptions + case Intrinsic::sideeffect: + // Discard annotate attributes, assumptions, and artificial side-effects. return nullptr; + case Intrinsic::codeview_annotation: { + // Emit a label associated with this metadata. + MachineFunction &MF = DAG.getMachineFunction(); + MCSymbol *Label = + MF.getMMI().getContext().createTempSymbol("annotation", true); + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); + Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); + DAG.setRoot(Res); + return nullptr; + } + case Intrinsic::init_trampoline: { const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); @@ -5643,17 +5741,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res); return nullptr; } - case Intrinsic::adjust_trampoline: { + case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::gcroot: { - MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); - (void)F; - assert(F->hasGC() && + assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); @@ -5670,11 +5764,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return nullptr; - case Intrinsic::expect: { + case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return nullptr; - } case Intrinsic::debugtrap: case Intrinsic::trap: { @@ -5728,6 +5821,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); @@ -5738,9 +5832,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ - false, /* volatile */ - rw==0, /* read */ - rw==1)); /* write */ + Flags)); return nullptr; } case Intrinsic::lifetime_start: @@ -5792,27 +5884,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::donothing: // ignore return nullptr; - case Intrinsic::experimental_stackmap: { + case Intrinsic::experimental_stackmap: visitStackmap(I); return nullptr; - } case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: { + case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I); return nullptr; - } - case Intrinsic::experimental_gc_statepoint: { + case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; - } - case Intrinsic::experimental_gc_result: { + case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); return nullptr; - } - case Intrinsic::experimental_gc_relocate: { + case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); return nullptr; - } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: @@ -5851,7 +5938,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); - unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); + unsigned IdxVal = + unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); @@ -5932,12 +6020,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return nullptr; } - - } } void SelectionDAGBuilder::visitConstrainedFPIntrinsic( @@ -5961,6 +6047,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_frem: Opcode = ISD::STRICT_FREM; break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6007,10 +6096,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); + else if (FPI.isTernaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }); else - Result = DAG.getNode(Opcode, sdl, VTs, + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); @@ -6081,7 +6175,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (MF.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), BeginLabel, EndLabel); } else { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); @@ -6189,7 +6283,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { - // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { @@ -6553,10 +6646,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for - // some reason. + // some reason or the call site requires strict floating point semantics. LibFunc Func; - if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(*F, Func) && + if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && + F->hasName() && LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; @@ -6735,7 +6828,7 @@ public: RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { } /// Whether or not this operand accesses memory @@ -6767,7 +6860,7 @@ public: // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); @@ -6799,7 +6892,7 @@ public: } }; -typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; +using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; } // end anonymous namespace @@ -6879,7 +6972,6 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { @@ -7013,6 +7105,8 @@ static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs, return true; } +namespace { + class ExtraFlags { unsigned Flags = 0; @@ -7028,7 +7122,7 @@ public: Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } - void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) { + void update(const TargetLowering::AsmOperandInfo &OpInfo) { // Ideally, we would only check against memory constraints. However, the // meaning of an Other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore @@ -7047,8 +7141,9 @@ public: unsigned get() const { return Flags; } }; +} // end anonymous namespace + /// visitInlineAsm - Handle a call to an InlineAsm object. -/// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -7207,13 +7302,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. - std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { - case InlineAsm::isOutput: { + case InlineAsm::isOutput: if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). @@ -7264,7 +7359,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { : InlineAsm::Kind_RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; - } + case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; @@ -7397,7 +7492,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { dl, DAG, AsmNodeOperands); break; } - case InlineAsm::isClobber: { + case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) @@ -7406,7 +7501,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands); break; } - } } // Finish up input operands. Set the input chain and add the flag last. @@ -7453,7 +7547,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + std::vector<std::pair<SDValue, const Value *>> StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. @@ -7865,13 +7959,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); @@ -7903,14 +7997,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, case Intrinsic::experimental_vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_fmax: { + case Intrinsic::experimental_vector_reduce_fmax: Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; - } - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::experimental_vector_reduce_fmin: Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; - } default: llvm_unreachable("Unhandled vector reduce intrinsic"); } @@ -7955,10 +8047,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { uint64_t Offset = OldOffsets[i]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); - unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) - Offsets.push_back(Offset + j * RegisterVTSize); + Offsets.push_back(Offset + j * RegisterVTByteSZ); } } @@ -7996,6 +8088,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument @@ -8148,8 +8241,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, - true); + CLI.CS.getInstruction(), ExtendKind, true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -8209,7 +8301,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); + Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace()); ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); @@ -8326,9 +8418,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } -typedef DenseMap<const Argument *, - std::pair<const AllocaInst *, const StoreInst *>> - ArgCopyElisionMapTy; +using ArgCopyElisionMapTy = + DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>>; /// Scan the entry block of the function in FuncInfo for arguments that look /// like copies into a local alloca. Record any copied arguments in @@ -8503,7 +8595,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -8657,7 +8751,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional<ISD::NodeType> AssertOp = None; @@ -8749,11 +8845,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + // We want to associate the argument with the frame index, among + // involved operands, that correspond to the lowest address. The + // getCopyFromParts function, called earlier, is swapping the order of + // the operands to BUILD_PAIR depending on endianness. The result of + // that swapping is that the least significant bits of the argument will + // be in the first operand of the BUILD_PAIR node, and the most + // significant bits will be in the second operand. + unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; if (LoadSDNode *LNode = - dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. @@ -8813,7 +8917,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. -/// void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); @@ -9249,10 +9352,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by probability first, number of bits second. + // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; - return a.Bits > b.Bits; + if (a.Bits != b.Bits) + return a.Bits > b.Bits; + return a.Mask < b.Mask; }); for (auto &CB : CBV) { @@ -9441,10 +9546,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by probability so the most likely case will be checked first. + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Prob > b.Prob; + return a.Prob != b.Prob ? + a.Prob > b.Prob : + a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible @@ -9570,8 +9680,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // The false probability is the sum of all unhandled cases. - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, - UnhandledProbs); + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, + getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -9627,7 +9737,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, I++; } - for (;;) { + while (true) { // Our binary search tree differs from a typical BST in that ours can have up // to three values in each leaf. The pivot selection above doesn't take that // into account, which means the tree might require more nodes and be less @@ -9722,7 +9832,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftProb, RightProb); + getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -9730,6 +9840,76 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, SwitchCases.push_back(CB); } +// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb +// from the swith statement. +static BranchProbability scaleCaseProbality(BranchProbability CaseProb, + BranchProbability PeeledCaseProb) { + if (PeeledCaseProb == BranchProbability::getOne()) + return BranchProbability::getZero(); + BranchProbability SwitchProb = PeeledCaseProb.getCompl(); + + uint32_t Numerator = CaseProb.getNumerator(); + uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator()); + return BranchProbability(Numerator, std::max(Numerator, Denominator)); +} + +// Try to peel the top probability case if it exceeds the threshold. +// Return current MachineBasicBlock for the switch statement if the peeling +// does not occur. +// If the peeling is performed, return the newly created MachineBasicBlock +// for the peeled switch statement. Also update Clusters to remove the peeled +// case. PeeledCaseProb is the BranchProbability for the peeled case. +MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( + const SwitchInst &SI, CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + // Don't perform if there is only one cluster or optimizing for size. + if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || + TM.getOptLevel() == CodeGenOpt::None || + SwitchMBB->getParent()->getFunction().optForMinSize()) + return SwitchMBB; + + BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); + unsigned PeeledCaseIndex = 0; + bool SwitchPeeled = false; + for (unsigned Index = 0; Index < Clusters.size(); ++Index) { + CaseCluster &CC = Clusters[Index]; + if (CC.Prob < TopCaseProb) + continue; + TopCaseProb = CC.Prob; + PeeledCaseIndex = Index; + SwitchPeeled = true; + } + if (!SwitchPeeled) + return SwitchMBB; + + DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb + << "\n"); + + // Record the MBB for the peeled switch statement. + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + MachineBasicBlock *PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + + ExportFromCurrentBlock(SI.getCondition()); + auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; + SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, + nullptr, nullptr, TopCaseProb.getCompl()}; + lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + + Clusters.erase(PeeledCaseIt); + for (CaseCluster &CC : Clusters) { + DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); + CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); + DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + } + PeeledCaseProb = TopCaseProb; + return PeeledSwitchMBB; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -9783,9 +9963,15 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } } + // The branch probablity of the peeled case. + BranchProbability PeeledCaseProb = BranchProbability::getZero(); + MachineBasicBlock *PeeledSwitchMBB = + peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); + // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { + assert(PeeledSwitchMBB == SwitchMBB); SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, @@ -9817,8 +10003,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); + // Scale the branchprobability for DefaultMBB if the peel occurs and + // DefaultMBB is not replaced. + if (PeeledCaseProb != BranchProbability::getZero() && + DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) + DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); @@ -9826,7 +10018,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && - !DefaultMBB->getParent()->getFunction()->optForMinSize()) { + !DefaultMBB->getParent()->getFunction().optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ac1d6aae65a5..9e7c2bc6821b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// +//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,67 +16,75 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Statepoint.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" +#include <algorithm> +#include <cassert> +#include <cstdint> #include <utility> #include <vector> namespace llvm { -class AddrSpaceCastInst; class AllocaInst; +class AtomicCmpXchgInst; +class AtomicRMWInst; class BasicBlock; -class BitCastInst; class BranchInst; class CallInst; +class CatchPadInst; +class CatchReturnInst; +class CatchSwitchInst; +class CleanupPadInst; +class CleanupReturnInst; +class Constant; +class ConstantInt; +class ConstrainedFPIntrinsic; class DbgValueInst; -class ExtractElementInst; -class FCmpInst; -class FPExtInst; -class FPToSIInst; -class FPToUIInst; -class FPTruncInst; -class Function; +class DataLayout; +class DIExpression; +class DILocalVariable; +class DILocation; +class FenceInst; class FunctionLoweringInfo; -class GetElementPtrInst; class GCFunctionInfo; -class ICmpInst; -class IntToPtrInst; +class GCRelocateInst; +class GCResultInst; class IndirectBrInst; class InvokeInst; -class InsertElementInst; -class Instruction; +class LandingPadInst; +class LLVMContext; class LoadInst; class MachineBasicBlock; -class MachineInstr; -class MachineRegisterInfo; -class MDNode; -class MVT; class PHINode; -class PtrToIntInst; +class ResumeInst; class ReturnInst; class SDDbgValue; -class SExtInst; -class SelectInst; -class ShuffleVectorInst; -class SIToFPInst; class StoreInst; class SwitchInst; -class DataLayout; class TargetLibraryInfo; -class TargetLowering; -class TruncInst; -class UIToFPInst; -class UnreachableInst; +class TargetMachine; +class Type; class VAArgInst; -class ZExtInst; +class UnreachableInst; +class Use; +class User; +class Value; //===----------------------------------------------------------------------===// /// SelectionDAGBuilder - This is the common target-independent lowering @@ -84,7 +92,7 @@ class ZExtInst; /// class SelectionDAGBuilder { /// CurInst - The current instruction being visited - const Instruction *CurInst; + const Instruction *CurInst = nullptr; DenseMap<const Value*, SDValue> NodeMap; @@ -94,13 +102,15 @@ class SelectionDAGBuilder { /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. class DanglingDebugInfo { - const DbgValueInst* DI; + const DbgValueInst* DI = nullptr; DebugLoc dl; - unsigned SDNodeOrder; + unsigned SDNodeOrder = 0; + public: - DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo() = default; DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} + const DbgValueInst* getDI() { return DI; } DebugLoc getdl() { return dl; } unsigned getSDNodeOrder() { return SDNodeOrder; } @@ -120,8 +130,8 @@ public: /// State used while lowering a statepoint sequence (gc_statepoint, /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. StatepointLoweringState StatepointLowering; -private: +private: /// PendingExports - CopyToReg nodes that copy values to virtual registers /// for export to other blocks need to be emitted before any terminator /// instruction, but they have no other ordering requirements. We bunch them @@ -189,23 +199,22 @@ private: } }; - typedef std::vector<CaseCluster> CaseClusterVector; - typedef CaseClusterVector::iterator CaseClusterIt; + using CaseClusterVector = std::vector<CaseCluster>; + using CaseClusterIt = CaseClusterVector::iterator; struct CaseBits { - uint64_t Mask; - MachineBasicBlock* BB; - unsigned Bits; + uint64_t Mask = 0; + MachineBasicBlock* BB = nullptr; + unsigned Bits = 0; BranchProbability ExtraProb; + CaseBits() = default; CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, BranchProbability Prob): - Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { } - - CaseBits() : Mask(0), BB(nullptr), Bits(0) {} + Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {} }; - typedef std::vector<CaseBits> CaseBitsVector; + using CaseBitsVector = std::vector<CaseBits>; /// Sort Clusters and merge adjacent cases. void sortAndRangeify(CaseClusterVector &Clusters); @@ -214,15 +223,6 @@ private: /// SelectionDAGBuilder and SDISel for the code generation of additional basic /// blocks needed by multi-case switch statements. struct CaseBlock { - CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, - const Value *cmpmiddle, MachineBasicBlock *truebb, - MachineBasicBlock *falsebb, MachineBasicBlock *me, - BranchProbability trueprob = BranchProbability::getUnknown(), - BranchProbability falseprob = BranchProbability::getUnknown()) - : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob), - FalseProb(falseprob) {} - // CC - the condition code to use for the case block's setcc node ISD::CondCode CC; @@ -237,14 +237,25 @@ private: // ThisBB - the block into which to emit the code for the setcc and branches MachineBasicBlock *ThisBB; + /// The debug location of the instruction this CaseBlock was + /// produced from. + SDLoc DL; + // TrueProb/FalseProb - branch weights. BranchProbability TrueProb, FalseProb; + + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, MachineBasicBlock *truebb, + MachineBasicBlock *falsebb, MachineBasicBlock *me, + SDLoc dl, + BranchProbability trueprob = BranchProbability::getUnknown(), + BranchProbability falseprob = BranchProbability::getUnknown()) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl), + TrueProb(trueprob), FalseProb(falseprob) {} }; struct JumpTable { - JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, - MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -255,39 +266,38 @@ private: /// Default - the MBB of the default bb, which is a successor of the range /// check MBB. This is when updating PHI nodes in successors. MachineBasicBlock *Default; + + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} }; struct JumpTableHeader { - JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, - bool E = false) - : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), - Emitted(E) {} APInt First; APInt Last; const Value *SValue; MachineBasicBlock *HeaderBB; bool Emitted; + + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, + bool E = false) + : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), + Emitted(E) {} }; - typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, - BranchProbability Prob): - Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; BranchProbability ExtraProb; + + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + BranchProbability Prob): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {} }; - typedef SmallVector<BitTestCase, 3> BitTestInfo; + using BitTestInfo = SmallVector<BitTestCase, 3>; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, - bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, - BitTestInfo C, BranchProbability Pr) - : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), - RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr) {} APInt First; APInt Range; const Value *SValue; @@ -300,6 +310,13 @@ private: BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; + + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, BranchProbability Pr) + : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), + RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), + Cases(std::move(C)), Prob(Pr) {} }; /// Return the range of value in [First..Last]. @@ -336,7 +353,7 @@ private: const ConstantInt *LT; BranchProbability DefaultProb; }; - typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; + using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>; /// Determine the rank by weight of CC in [First,Last]. If CC has more weight /// than each cluster in the range, its rank is 0. @@ -352,6 +369,10 @@ private: MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); + /// Peel the top probability case if it exceeds the threshold + MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI, + CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb); /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized @@ -466,8 +487,7 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() - : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {} + StackProtectorDescriptor() = default; /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. @@ -533,15 +553,15 @@ private: /// replace it with a compare/branch to the successor mbbs /// SuccessMBB/FailureMBB depending on whether or not the stack protector /// was violated. - MachineBasicBlock *ParentMBB; + MachineBasicBlock *ParentMBB = nullptr; /// A basic block visited on stack protector check success that contains the /// terminators of ParentMBB. - MachineBasicBlock *SuccessMBB; + MachineBasicBlock *SuccessMBB = nullptr; /// This basic block visited on stack protector check failure that will /// contain a call to __stack_chk_fail(). - MachineBasicBlock *FailureMBB; + MachineBasicBlock *FailureMBB = nullptr; /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic @@ -554,25 +574,29 @@ private: private: const TargetMachine &TM; + public: /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling /// nodes without a corresponding SDNode. static const unsigned LowestSDNodeOrder = 1; SelectionDAG &DAG; - const DataLayout *DL; - AliasAnalysis *AA; + const DataLayout *DL = nullptr; + AliasAnalysis *AA = nullptr; const TargetLibraryInfo *LibInfo; /// SwitchCases - Vector of CaseBlock structures used to communicate /// SwitchInst code generation information. std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate /// SwitchInst code generation information. std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector /// information in between SelectBasicBlock and FinishBasicBlock. StackProtectorDescriptor SPDescriptor; @@ -589,22 +613,19 @@ public: GCFunctionInfo *GFI; /// LPadToCallSiteMap - Map a landing pad to the call site indexes. - DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap; + DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap; /// HasTailCall - This is set to true if a call in the current /// block has been translated as a tail call. In this case, /// no subsequent DAG nodes should be created. - /// - bool HasTailCall; + bool HasTailCall = false; LLVMContext *Context; SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo), - HasTailCall(false) { - } + : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), + FuncInfo(funcinfo) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, const TargetLibraryInfo *li); @@ -653,6 +674,7 @@ public: // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); + SDValue getValue(const Value *V); bool findValue(const Value *V) const; @@ -923,13 +945,12 @@ private: void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); - /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of - /// instruction selection, they will be inserted to the entry BB. + /// If V is an function argument then create corresponding DBG_VALUE machine + /// instruction for it now. At the end of instruction selection, they will be + /// inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, - int64_t Offset, bool IsDbgDeclare, - const SDValue &N); + bool IsDbgDeclare, const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); @@ -940,8 +961,8 @@ private: /// Return the appropriate SDDbgValue based on N. SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable, - DIExpression *Expr, int64_t Offset, - const DebugLoc &dl, unsigned DbgSDNodeOrder); + DIExpression *Expr, const DebugLoc &dl, + unsigned DbgSDNodeOrder); }; /// RegsForValue - This struct represents the registers (physical or virtual) @@ -978,13 +999,11 @@ struct RegsForValue { /// Records if this value needs to be treated in an ABI dependant manner, /// different to normal type legalization. - bool IsABIMangled; - - RegsForValue(); + bool IsABIMangled = false; + RegsForValue() = default; RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, bool IsABIMangledValue = false); - RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, bool IsABIMangledValue = false); @@ -1024,4 +1043,4 @@ struct RegsForValue { } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 3dd58975b1f1..dd30dc16378c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===// // // The LLVM Compiler Infrastructure // @@ -11,24 +11,42 @@ // //===----------------------------------------------------------------------===// -#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cstdint> +#include <iterator> + using namespace llvm; static cl::opt<bool> @@ -385,6 +403,7 @@ static Printable PrintNodeId(const SDNode &Node) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } + LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -402,6 +421,36 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (getFlags().hasNoUnsignedWrap()) + OS << " nuw"; + + if (getFlags().hasNoSignedWrap()) + OS << " nsw"; + + if (getFlags().hasExact()) + OS << " exact"; + + if (getFlags().hasUnsafeAlgebra()) + OS << " unsafe"; + + if (getFlags().hasNoNaNs()) + OS << " nnan"; + + if (getFlags().hasNoInfs()) + OS << " ninf"; + + if (getFlags().hasNoSignedZeros()) + OS << " nsz"; + + if (getFlags().hasAllowReciprocal()) + OS << " arcp"; + + if (getFlags().hasAllowContract()) + OS << " contract"; + + if (getFlags().hasVectorReduction()) + OS << " vector-reduction"; + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { if (!MN->memoperands_empty()) { OS << "<"; @@ -429,9 +478,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle()) + if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle()) OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble()) + else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; else { OS << "<APFloat("; @@ -479,7 +528,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), + OS << ' ' << printReg(R->getReg(), G ? G->getSubtarget().getRegisterInfo() : nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { @@ -640,7 +689,8 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet; +using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>; + static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { if (!once.insert(N).second) // If we've been here before, return now. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bdf57e805842..18f6997ef83c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/APInt.h" @@ -26,7 +27,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -45,9 +46,12 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -80,13 +84,9 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <cassert> @@ -212,7 +212,7 @@ namespace llvm { IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); DEBUG(dbgs() << "\nChanging optimization level for Function " - << IS.MF->getFunction()->getName() << "\n"); + << IS.MF->getFunction().getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); SavedFastISel = IS.TM.Options.EnableFastISel; @@ -228,7 +228,7 @@ namespace llvm { if (IS.OptLevel == SavedOptLevel) return; DEBUG(dbgs() << "\nRestoring optimization level for Function " - << IS.MF->getFunction()->getName() << "\n"); + << IS.MF->getFunction().getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" << SavedOptLevel << "\n"); IS.OptLevel = SavedOptLevel; @@ -384,7 +384,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort > 0 requires -fast-isel"); - const Function &Fn = *mf.getFunction(); + const Function &Fn = mf.getFunction(); MF = &mf; // Reset the target options before resetting the optimization @@ -414,7 +414,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE); + CurDAG->init(*MF, *ORE, this); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -494,10 +494,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) - for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), - E = RegInfo->livein_end(); LI != E; ++LI) - if (LI->second) - LiveInMap.insert(std::make_pair(LI->first, LI->second)); + for (std::pair<unsigned, unsigned> LI : RegInfo->liveins()) + if (LI.second) + LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { @@ -529,12 +528,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); - unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + if (IsIndirect) + assert(MI->getOperand(1).getImm() == 0 && + "DBG_VALUE with nonzero offset"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, LDI->second, Offset, Variable, Expr); + IsIndirect, LDI->second, Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -556,7 +557,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); + CopyUseMI->getOperand(0).getReg(), Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -644,6 +645,9 @@ static void reportFastISelFailure(MachineFunction &MF, void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { + // Allow creating illegal types during DAG building for the basic block. + CurDAG->NewNodesMustHaveLegalTypes = false; + // Lower the instructions. If a call is emitted as a tail call, cease emitting // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { @@ -726,8 +730,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } - DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB) + << " '" << BlockName << "'\n"; + CurDAG->dump()); if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -739,8 +744,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -754,8 +761,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -771,8 +780,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } { @@ -782,8 +793,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } if (Changed) { - DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, @@ -791,8 +804,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Vector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -804,8 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" - << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs && MatchFilterBB) @@ -817,8 +834,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } - DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -830,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -847,8 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -915,9 +938,9 @@ public: } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { - DEBUG(dbgs() << "===== Instruction selection begins: BB#" - << FuncInfo->MBB->getNumber() - << " '" << FuncInfo->MBB->getName() << "'\n"); + DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" + << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -1138,7 +1161,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { // Look through casts and constant offset GEPs. These mostly come from // inalloca. - APInt Offset(DL.getPointerSizeInBits(0), 0); + APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); // Check if the variable is a static alloca or a byval or inalloca @@ -1177,12 +1200,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { // For each machine basic block in reverse post order. ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF); - for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator - It = RPOT.begin(), - E = RPOT.end(); - It != E; ++It) { - MachineBasicBlock *MBB = *It; - + for (MachineBasicBlock *MBB : RPOT) { // For each swifterror value in the function. for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { auto Key = std::make_pair(MBB, SwiftErrorVal); @@ -1253,6 +1271,8 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { // If we don't need a phi create a copy to the upward exposed vreg. if (!needPHI) { assert(UpwardsUse); + assert(!VRegs.empty() && + "No predecessors? Is the Calling Convention correct?"); unsigned DestReg = UUseVReg; BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), DestReg) @@ -1282,10 +1302,10 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } } -void preassignSwiftErrorRegs(const TargetLowering *TLI, - FunctionLoweringInfo *FuncInfo, - BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End) { +static void preassignSwiftErrorRegs(const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) return; @@ -2774,6 +2794,12 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckType(Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout()); return Index; + case SelectionDAGISel::OPC_CheckTypeRes: { + unsigned Res = Table[Index++]; + Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + } case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: case SelectionDAGISel::OPC_CheckChild2Type: @@ -2906,6 +2932,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::ANNOTATION_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. @@ -3175,6 +3202,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, break; continue; + case OPC_CheckTypeRes: { + unsigned Res = MatcherTable[MatcherIndex++]; + if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI, + CurDAG->getDataLayout())) + break; + continue; + } + case OPC_SwitchOpcode: { unsigned CurNodeOpcode = N.getOpcode(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; @@ -3548,6 +3583,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, "NodeToMatch was removed partway through selection"); SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, SDNode *E) { + CurDAG->salvageDebugInfo(*N); auto &Chain = ChainNodesMatched; assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); @@ -3725,6 +3761,25 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } } +bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); + auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return false; + + // Detect when "or" is used to add an offset to a stack object. + if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + unsigned A = MFI.getObjectAlignment(FN->getIndex()); + assert(isPowerOf2_32(A) && "Unexpected alignment"); + int32_t Off = C->getSExtValue(); + // If the alleged offset fits in the zero bits guaranteed by + // the alignment, then this or is really an add. + return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + } + return false; +} + void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 11561dfa5947..be4ab094bf49 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -16,15 +16,13 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "dag-printer" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp index 55f70f7d9fd3..3a283bc5fdc0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===// +//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAGTargetInfo.h" + using namespace llvm; -SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {} +SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default; diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 5d78bba86d73..3f64b49e3555 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1,4 +1,4 @@ -//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===// +//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===// // // The LLVM Compiler Infrastructure // @@ -14,21 +14,44 @@ #include "StatepointLowering.h" #include "SelectionDAGBuilder.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" -#include "llvm/Target/TargetLowering.h" -#include <algorithm> +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <tuple> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "statepoint-lowering" @@ -73,7 +96,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, NumSlotsAllocatedForStatepoints++; MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - unsigned SpillSize = ValueType.getSizeInBits() / 8; + unsigned SpillSize = ValueType.getStoreSize(); assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); // First look for a previously created stack slot which is not in @@ -200,7 +223,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, /// values on the stack between calls. static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { - SDValue Incoming = Builder.getValue(IncomingValue); if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { @@ -292,7 +314,6 @@ removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { - SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerInvokable(SI.CLI, SI.EHPadBB); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h index b043184003a0..372c82a359f6 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -1,4 +1,4 @@ -//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===// +//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -16,11 +16,16 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include <cassert> namespace llvm { + +class CallInst; class SelectionDAGBuilder; /// This class tracks both per-statepoint and per-selectiondag information. @@ -30,7 +35,7 @@ class SelectionDAGBuilder; /// works in concert with information in FunctionLoweringInfo. class StatepointLoweringState { public: - StatepointLoweringState() : NextSlotToAllocate(0) {} + StatepointLoweringState() = default; /// Reset all state tracking for a newly encountered safepoint. Also /// performs some consistency checking. @@ -69,7 +74,7 @@ public: /// before the next statepoint. If we weren't expecting to see /// it, we'll report an assertion. void relocCallVisited(const CallInst &RelocCall) { - auto I = find(PendingGCRelocateCalls, &RelocCall); + auto I = llvm::find(PendingGCRelocateCalls, &RelocCall); assert(I != PendingGCRelocateCalls.end() && "Visited unexpected gcrelocate call"); PendingGCRelocateCalls.erase(I); @@ -108,11 +113,12 @@ private: SmallBitVector AllocatedStackSlots; /// Points just beyond the last slot known to have been allocated - unsigned NextSlotToAllocate; + unsigned NextSlotToAllocate = 0; /// Keep track of pending gcrelocate calls for consistency check SmallVector<const CallInst *, 10> PendingGCRelocateCalls; }; + } // end namespace llvm #endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8652df7bbd70..58276052c10b 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -20,6 +20,9 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -29,10 +32,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cctype> using namespace llvm; @@ -52,11 +52,11 @@ bool TargetLowering::isPositionIndependent() const { /// so, it sets Chain to the input chain of the tail call. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const { - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. - AttributeList CallerAttrs = F->getAttributes(); + AttributeList CallerAttrs = F.getAttributes(); if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) .removeAttribute(Attribute::NoAlias) .hasAttributes()) @@ -408,7 +408,7 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // Search for the smallest integer type with free casts to and from // Op's type. For expedience, just check power-of-2 integer types. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); + unsigned DemandedSize = Demanded.getActiveBits(); unsigned SmallVTBits = DemandedSize; if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); @@ -421,9 +421,8 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, Op.getOpcode(), dl, SmallVT, DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1))); - bool NeedZext = DemandedSize > SmallVTBits; - SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, - dl, Op.getValueType(), X); + assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); + SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X); return TLO.CombineTo(Op, Z); } } @@ -459,7 +458,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, // If Old has more than one use then it must be Op, because the // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that - // it will attempt to combine will be opt. + // it will attempt to combine will be Op. assert(TLO.Old == Op); SmallVector <SDValue, 4> NewOps; @@ -470,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, } NewOps.push_back(User->getOperand(i)); } - TLO.DAG.UpdateNodeOperands(User, NewOps); + User = TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -480,7 +479,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, return true; } -bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -517,6 +516,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Don't know anything. Known = KnownBits(BitWidth); + if (Op.getOpcode() == ISD::Constant) { + // We know all of the bits for a constant! + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; + return false; + } + // Other users may use these bits. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { @@ -539,11 +545,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits Known2, KnownOut; switch (Op.getOpcode()) { - case ISD::Constant: - // We know all of the bits for a constant! - Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); - Known.Zero = ~Known.One; - return false; // Don't fall through, will infinitely loop. case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -780,33 +781,38 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::SHL: - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned ShAmt = SA->getZExtValue(); + if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { SDValue InOp = Op.getOperand(0); // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) + if (SA->getAPIntValue().uge(BitWidth)) break; + unsigned ShAmt = SA->getZExtValue(); + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a // single shift. We can do this if the bottom bits (which are shifted // out) are never demanded. - if (InOp.getOpcode() == ISD::SRL && - isa<ConstantSDNode>(InOp.getOperand(1))) { - if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { - unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); - unsigned Opc = ISD::SHL; - int Diff = ShAmt-C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SRL; - } + if (InOp.getOpcode() == ISD::SRL) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (SA2->getAPIntValue().ult(BitWidth)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } - SDValue NewSA = - TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, - InOp.getOperand(0), NewSA)); + SDValue NewSA = + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), + NewSA)); + } + } } } @@ -818,7 +824,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = InOp.getOperand(0); EVT InnerVT = InnerOp.getValueType(); - unsigned InnerBits = InnerVT.getSizeInBits(); + unsigned InnerBits = InnerVT.getScalarSizeInBits(); if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT, DL); @@ -837,45 +843,42 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // (shl (anyext x), c2-c1). This requires that the bottom c1 bits // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. - if (InOp.hasOneUse() && - InnerOp.getOpcode() == ISD::SRL && - InnerOp.hasOneUse() && - isa<ConstantSDNode>(InnerOp.getOperand(1))) { - unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) - ->getZExtValue(); - if (InnerShAmt < ShAmt && - InnerShAmt < InnerBits && - NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && - NewMask.countTrailingZeros() >= ShAmt) { - SDValue NewSA = - TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, - Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); - SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, - InnerOp.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, - NewExt, NewSA)); + if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse()) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) { + unsigned InnerShAmt = SA2->getLimitedValue(InnerBits); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && + NewMask.countTrailingZeros() >= ShAmt) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } } } } - Known.Zero <<= SA->getZExtValue(); - Known.One <<= SA->getZExtValue(); + Known.Zero <<= ShAmt; + Known.One <<= ShAmt; // low bits known zero. - Known.Zero.setLowBits(SA->getZExtValue()); + Known.Zero.setLowBits(ShAmt); } break; case ISD::SRL: - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - EVT VT = Op.getValueType(); - unsigned ShAmt = SA->getZExtValue(); - unsigned VTSize = VT.getSizeInBits(); + if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { SDValue InOp = Op.getOperand(0); // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) + if (SA->getAPIntValue().uge(BitWidth)) break; + unsigned ShAmt = SA->getZExtValue(); APInt InDemandedMask = (NewMask << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -886,21 +889,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a // single shift. We can do this if the top bits (which are shifted out) // are never demanded. - if (InOp.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(InOp.getOperand(1))) { - if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { - unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); - unsigned Opc = ISD::SRL; - int Diff = ShAmt-C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SHL; - } + if (InOp.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) { + if (ShAmt && + (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (SA2->getAPIntValue().ult(BitWidth)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } - SDValue NewSA = - TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, - InOp.getOperand(0), NewSA)); + SDValue NewSA = + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), + NewSA)); + } + } } } @@ -924,14 +933,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { EVT VT = Op.getValueType(); - unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) + if (SA->getAPIntValue().uge(BitWidth)) break; + unsigned ShAmt = SA->getZExtValue(); APInt InDemandedMask = (NewMask << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -979,15 +988,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; case ISD::SIGN_EXTEND_INREG: { EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned ExVTBits = ExVT.getScalarSizeInBits(); - APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == NewMask) { - unsigned ShAmt = ExVT.getScalarSizeInBits(); + if (NewMask.isSignMask()) { SDValue InOp = Op.getOperand(0); - unsigned VTBits = Op->getValueType(0).getScalarSizeInBits(); bool AlreadySignExtended = - TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -997,7 +1004,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, Op.getValueType(), InOp, @@ -1005,26 +1012,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - // Sign extension. Compute the demanded bits in the result that are not - // present in the input. - APInt NewBits = - APInt::getHighBitsSet(BitWidth, - BitWidth - ExVT.getScalarSizeInBits()); - // If none of the extended bits are demanded, eliminate the sextinreg. - if ((NewBits & NewMask) == 0) + if (NewMask.getActiveBits() <= ExVTBits) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = - APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth); - APInt InputDemandedBits = - APInt::getLowBitsSet(BitWidth, - ExVT.getScalarSizeInBits()) & - NewMask; + APInt InputDemandedBits = NewMask.getLoBits(ExVTBits); // Since the sign extended bits are demanded, we know that the sign // bit is demanded. - InputDemandedBits |= InSignBit; + InputDemandedBits.setBit(ExVTBits - 1); if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, Known, TLO, Depth+1)) @@ -1035,16 +1031,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // top bits of the result. // If the input sign bit is known zero, convert this into a zero extension. - if (Known.Zero.intersects(InSignBit)) + if (Known.Zero[ExVTBits - 1]) return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( Op.getOperand(0), dl, ExVT.getScalarType())); - if (Known.One.intersects(InSignBit)) { // Input sign bit known set - Known.One |= NewBits; - Known.Zero &= ~NewBits; + APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); + if (Known.One[ExVTBits - 1]) { // Input sign bit known set + Known.One.setBitsFrom(ExVTBits); + Known.Zero &= Mask; } else { // Input sign bit unknown - Known.Zero &= ~NewBits; - Known.One &= ~NewBits; + Known.Zero &= Mask; + Known.One &= Mask; } break; } @@ -1072,61 +1069,47 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); - APInt InMask = NewMask.trunc(OperandBitWidth); // If none of the top bits are demanded, convert this into an any_extend. - APInt NewBits = - APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; - if (!NewBits.intersects(NewMask)) + if (NewMask.getActiveBits() <= OperandBitWidth) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); + APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); - Known.Zero |= NewBits; + Known.Zero.setBitsFrom(OperandBitWidth); break; } case ISD::SIGN_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); - APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1); - APInt NewBits = ~InMask & NewMask; + unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits(); // If none of the top bits are demanded, convert this into an any_extend. - if (NewBits == 0) + if (NewMask.getActiveBits() <= InBits) return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. - APInt InDemandedBits = InMask & NewMask; - InDemandedBits |= InSignBit; - InDemandedBits = InDemandedBits.trunc(InBits); + APInt InDemandedBits = NewMask.trunc(InBits); + InDemandedBits.setBit(InBits - 1); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, Depth+1)) return true; - Known = Known.zext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + // If the sign bit is known one, the top bits match. + Known = Known.sext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (Known.Zero.intersects(InSignBit)) + if (Known.isNonNegative()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); - - // If the sign bit is known one, the top bits match. - if (Known.One.intersects(InSignBit)) { - Known.One |= NewBits; - assert((Known.Zero & NewBits) == 0); - } else { // Otherwise, top bits aren't known. - assert((Known.One & NewBits) == 0); - assert((Known.Zero & NewBits) == 0); - } break; } case ISD::ANY_EXTEND: { @@ -1305,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); + + if (unsigned Align = DAG.InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2_32(Align)); + } +} + /// This method can be implemented by targets that want to expose additional /// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, @@ -2967,7 +2963,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector<SDNode *> *Created) const { - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV @@ -3436,8 +3432,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - EVT PtrVT = BasePtr.getValueType(); - // Store Stride in bytes unsigned Stride = MemSclVT.getSizeInBits() / 8; EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); @@ -3450,8 +3444,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, DAG.getConstant(Idx, SL, IdxVT)); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(Idx * Stride, SL, PtrVT)); + SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( @@ -3474,6 +3467,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); SDLoc dl(LD); + auto &MF = DAG.getMachineFunction(); + if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { @@ -3498,13 +3493,13 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. MVT RegVT = getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned LoadedBytes = LoadedVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - + auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex(); SmallVector<SDValue, 8> Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -3523,13 +3518,14 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo())); + Stores.push_back(DAG.getStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset))); // Increment the pointers. Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr, - StackPtrIncrement); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); } // The last copy may be partial. Do an extending load. @@ -3543,15 +3539,17 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT)); + Stores.push_back(DAG.getTruncStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT); + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), + LoadedVT); // Callers expect a MERGE_VALUES node. return std::make_pair(Load, TF); @@ -3581,8 +3579,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, MinAlign(Alignment, IncrementSize), @@ -3591,8 +3589,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, MinAlign(Alignment, IncrementSize), @@ -3621,6 +3619,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); + auto &MF = DAG.getMachineFunction(); SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || @@ -3649,16 +3648,18 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); EVT PtrVT = Ptr.getValueType(); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned StoredBytes = StoredVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; // Make sure the stack slot is also aligned for the register type. SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, - MachinePointerInfo(), StoredVT); + SDValue Store = DAG.getTruncStore( + Chain, dl, Val, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT); EVT StackPtrVT = StackPtr.getValueType(); @@ -3670,8 +3671,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = - DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo()); + SDValue Load = DAG.getLoad( + RegVT, dl, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -3679,9 +3681,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, ST->getMemOperand()->getFlags())); // Increment the pointers. Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, - StackPtr, StackPtrIncrement); - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); } // The last store may be partial. Do a truncating store. On big-endian @@ -3691,8 +3692,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), MemVT); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT); Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, @@ -3726,9 +3728,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, ST->getMemOperand()->getFlags()); - EVT PtrVT = Ptr.getValueType(); - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(IncrementSize, dl, PtrVT)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, @@ -3767,7 +3767,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); } else - Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT); + Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } @@ -3797,7 +3797,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue Index) const { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout())); + Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); EVT EltVT = VecVT.getVectorElementType(); diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index 7b60d22c7ace..25d405bf63de 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -1,4 +1,4 @@ -//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===// +//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===// // // The LLVM Compiler Infrastructure // @@ -16,14 +16,31 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/CallSite.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include <cassert> +#include <cstddef> +#include <string> +#include <utility> +#include <vector> using namespace llvm; @@ -34,12 +51,11 @@ namespace { class ShadowStackGCLowering : public FunctionPass { /// RootChain - This is the global linked-list that contains the chain of GC /// roots. - GlobalVariable *Head; + GlobalVariable *Head = nullptr; /// StackEntryTy - Abstract type of a link in the shadow stack. - /// - StructType *StackEntryTy; - StructType *FrameMapTy; + StructType *StackEntryTy = nullptr; + StructType *FrameMapTy = nullptr; /// Roots - GC roots in the current function. Each is a pair of the /// intrinsic call and its corresponding alloca. @@ -47,6 +63,7 @@ class ShadowStackGCLowering : public FunctionPass { public: static char ID; + ShadowStackGCLowering(); bool doInitialization(Module &M) override; @@ -57,6 +74,7 @@ private: Constant *GetFrameMap(Function &F); Type *GetConcreteStackEntryType(Function &F); void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, Type *Ty, Value *BasePtr, int Idx1, const char *Name); @@ -64,7 +82,10 @@ private: Type *Ty, Value *BasePtr, int Idx1, int Idx2, const char *Name); }; -} + +} // end anonymous namespace + +char ShadowStackGCLowering::ID = 0; INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) @@ -74,11 +95,7 @@ INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE, FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } -char ShadowStackGCLowering::ID = 0; - -ShadowStackGCLowering::ShadowStackGCLowering() - : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr), - FrameMapTy(nullptr) { +ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) { initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index aa75f5e2caa2..b35bf6ba3a7b 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -1,4 +1,4 @@ -//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===// +//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===// // // The LLVM Compiler Infrastructure // @@ -45,50 +45,58 @@ // // If this pass found points matching all these properties, then // MachineFrameInfo is updated with this information. +// //===----------------------------------------------------------------------===// + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -// To check for profitability. +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -// For property #1 for Save. #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -// To record the result of the analysis. #include "llvm/CodeGen/MachineFrameInfo.h" -// For property #2. +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" -// For property #1 for Restore. +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/Passes.h" -// To know about callee-saved. #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -// To query the target about frame lowering. -#include "llvm/Target/TargetFrameLowering.h" -// To know about frame setup operation. -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -// To access TargetInstrInfo. -#include "llvm/Target/TargetSubtargetInfo.h" - -#define DEBUG_TYPE "shrink-wrap" +#include <cassert> +#include <cstdint> +#include <memory> using namespace llvm; +#define DEBUG_TYPE "shrink-wrap" + STATISTIC(NumFunc, "Number of functions"); STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); STATISTIC(NumCandidatesDropped, "Number of shrink-wrapping candidates dropped because of frequency"); static cl::opt<cl::boolOrDefault> - EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, - cl::desc("enable the shrink-wrapping pass")); +EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); namespace { + /// \brief Class to determine where the safe point to insert the /// prologue and epilogue are. /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the @@ -101,31 +109,42 @@ class ShrinkWrap : public MachineFunctionPass { RegisterClassInfo RCI; MachineDominatorTree *MDT; MachinePostDominatorTree *MPDT; + /// Current safe point found for the prologue. /// The prologue will be inserted before the first instruction /// in this basic block. MachineBasicBlock *Save; + /// Current safe point found for the epilogue. /// The epilogue will be inserted before the first terminator instruction /// in this basic block. MachineBasicBlock *Restore; + /// Hold the information of the basic block frequency. /// Use to check the profitability of the new points. MachineBlockFrequencyInfo *MBFI; + /// Hold the loop information. Used to determine if Save and Restore /// are in the same loop. MachineLoopInfo *MLI; + /// Frequency of the Entry block. uint64_t EntryFreq; + /// Current opcode for frame setup. unsigned FrameSetupOpcode; + /// Current opcode for frame destroy. unsigned FrameDestroyOpcode; + /// Entry block. const MachineBasicBlock *Entry; - typedef SmallSetVector<unsigned, 16> SetOfRegs; + + using SetOfRegs = SmallSetVector<unsigned, 16>; + /// Registers that need to be saved for the current function. mutable SetOfRegs CurrentCSRs; + /// Current MachineFunction. MachineFunction *MachineFunc; @@ -205,9 +224,11 @@ public: /// the MachineFrameInfo attached to \p MF with the results. bool runOnMachineFunction(MachineFunction &MF) override; }; -} // End anonymous namespace. + +} // end anonymous namespace char ShrinkWrap::ID = 0; + char &llvm::ShrinkWrapID = ShrinkWrap::ID; INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) @@ -219,6 +240,10 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { + // Ignore DBG_VALUE and other meta instructions that must not affect codegen. + if (MI.isMetaInstruction()) + return false; + if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); @@ -424,7 +449,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF, } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) + if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); @@ -537,16 +562,17 @@ bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { switch (EnableShrinkWrapOpt) { case cl::BOU_UNSET: return TFI->enableShrinkWrapping(MF) && - // Windows with CFI has some limitations that make it impossible - // to use shrink-wrapping. - !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - // Sanitizers look at the value of the stack at the location - // of the crash. Since a crash can happen anywhere, the - // frame must be lowered before anything else happen for the - // sanitizers to be able to get a correct stack frame. - !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || - MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) || - MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory)); + // Windows with CFI has some limitations that make it impossible + // to use shrink-wrapping. + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + // Sanitizers look at the value of the stack at the location + // of the crash. Since a crash can happen anywhere, the + // frame must be lowered before anything else happen for the + // sanitizers to be able to get a correct stack frame. + !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress)); // If EnableShrinkWrap is set, it takes precedence on whatever the // target sets. The rational is that we assume we want to test // something related to shrink-wrapping. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 3656832a7f1a..ea74c777e1e2 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -264,7 +263,7 @@ LLVM_DUMP_METHOD void SlotIndexes::dump() const { } for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) - dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';' << MBBRanges[i].second << ")\n"; } #endif diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 0abe1c47da55..b989b54d4190 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -1,4 +1,4 @@ -//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===// +//===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===// // // The LLVM Compiler Infrastructure // @@ -28,21 +28,31 @@ //===----------------------------------------------------------------------===// #include "SpillPlacement.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ManagedStatic.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> using namespace llvm; #define DEBUG_TYPE "spill-code-placement" char SpillPlacement::ID = 0; + +char &llvm::SpillPlacementID = SpillPlacement::ID; + INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundles) @@ -50,8 +60,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) -char &llvm::SpillPlacementID = SpillPlacement::ID; - void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<MachineBlockFrequencyInfo>(); @@ -68,10 +76,10 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// The node Value is positive when the variable should be in a register. The /// value can change when linked nodes change, but convergence is very fast /// because all weights are positive. -/// struct SpillPlacement::Node { /// BiasN - Sum of blocks that prefer a spill. BlockFrequency BiasN; + /// BiasP - Sum of blocks that prefer a register. BlockFrequency BiasP; @@ -80,7 +88,7 @@ struct SpillPlacement::Node { /// variable should go in a register through this bundle. int Value; - typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector; + using LinkVector = SmallVector<std::pair<BlockFrequency, unsigned>, 4>; /// Links - (Weight, BundleNo) for all transparent blocks connecting to other /// bundles. The weights are all positive block frequencies. @@ -104,7 +112,7 @@ struct SpillPlacement::Node { } /// clear - Reset per-query data, but preserve frequencies that only depend on - // the CFG. + /// the CFG. void clear(const BlockFrequency &Threshold) { BiasN = BiasP = Value = 0; SumLinkWeights = Threshold; @@ -260,14 +268,14 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { // Live-in to block? if (I->Entry != DontCare) { - unsigned ib = bundles->getBundle(I->Number, 0); + unsigned ib = bundles->getBundle(I->Number, false); activate(ib); nodes[ib].addBias(Freq, I->Entry); } // Live-out from block? if (I->Exit != DontCare) { - unsigned ob = bundles->getBundle(I->Number, 1); + unsigned ob = bundles->getBundle(I->Number, true); activate(ob); nodes[ob].addBias(Freq, I->Exit); } @@ -281,8 +289,8 @@ void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { BlockFrequency Freq = BlockFrequencies[*I]; if (Strong) Freq += Freq; - unsigned ib = bundles->getBundle(*I, 0); - unsigned ob = bundles->getBundle(*I, 1); + unsigned ib = bundles->getBundle(*I, false); + unsigned ob = bundles->getBundle(*I, true); activate(ib); activate(ob); nodes[ib].addBias(Freq, PrefSpill); @@ -294,8 +302,8 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { unsigned Number = *I; - unsigned ib = bundles->getBundle(Number, 0); - unsigned ob = bundles->getBundle(Number, 1); + unsigned ib = bundles->getBundle(Number, false); + unsigned ob = bundles->getBundle(Number, true); // Ignore self-loops. if (ib == ob) diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index 9b9ecccf9049..aa3ac444e0da 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -1,4 +1,4 @@ -//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===// +//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -37,9 +37,9 @@ namespace llvm { class BitVector; class EdgeBundles; -class MachineBasicBlock; -class MachineLoopInfo; class MachineBlockFrequencyInfo; +class MachineFunction; +class MachineLoopInfo; class SpillPlacement : public MachineFunctionPass { struct Node; @@ -47,7 +47,7 @@ class SpillPlacement : public MachineFunctionPass { const EdgeBundles *bundles; const MachineLoopInfo *loops; const MachineBlockFrequencyInfo *MBFI; - Node *nodes; + Node *nodes = nullptr; // Nodes that are active in the current computation. Owned by the prepare() // caller. @@ -73,7 +73,7 @@ class SpillPlacement : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid. - SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {} + SpillPlacement() : MachineFunctionPass(ID) {} ~SpillPlacement() override { releaseMemory(); } /// BorderConstraint - A basic block has separate constraints for entry and @@ -155,16 +155,16 @@ public: } private: - bool runOnMachineFunction(MachineFunction&) override; - void getAnalysisUsage(AnalysisUsage&) const override; + bool runOnMachineFunction(MachineFunction &mf) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; - void activate(unsigned); + void activate(unsigned n); void setThreshold(const BlockFrequency &Entry); - bool update(unsigned); + bool update(unsigned n); }; } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_SPILLPLACEMENT_H diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 61ee508c8394..330ee81342b6 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===// +//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,11 +12,10 @@ namespace llvm { - class LiveRangeEdit; - class MachineFunction; - class MachineFunctionPass; - class VirtRegMap; - class LiveIntervals; +class LiveRangeEdit; +class MachineFunction; +class MachineFunctionPass; +class VirtRegMap; /// Spiller interface. /// @@ -24,12 +23,14 @@ namespace llvm { /// demand. class Spiller { virtual void anchor(); + public: virtual ~Spiller() = 0; /// spill - Spill the LRE.getParent() live interval. virtual void spill(LiveRangeEdit &LRE) = 0; - virtual void postOptimization(){}; + + virtual void postOptimization() {} }; /// Create and return a spiller that will insert spill code directly instead @@ -37,6 +38,7 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm); -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SPILLER_H diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 323045fd2aaa..c99c3b09d88a 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -1,4 +1,4 @@ -//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +//===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===// // // The LLVM Compiler Infrastructure // @@ -13,20 +13,46 @@ //===----------------------------------------------------------------------===// #include "SplitKit.h" +#include "LiveRangeCalc.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <limits> +#include <tuple> +#include <utility> using namespace llvm; @@ -125,8 +151,7 @@ InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI, SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, const MachineLoopInfo &mli) : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli), - TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr), - IPA(lis, MF.getNumBlockIDs()) {} + TII(*MF.getSubtarget().getInstrInfo()), IPA(lis, MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); @@ -200,7 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { // Loop over basic blocks where CurLI is live. MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start)->getIterator(); - for (;;) { + while (true) { BlockInfo BI; BI.MBB = &*MFI; SlotIndex Start, Stop; @@ -301,7 +326,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start)->getIterator(); SlotIndex Stop = LIS.getMBBEndIdx(&*MFI); - for (;;) { + while (true) { ++Count; LVI = li->advanceTo(LVI, Stop); if (LVI == LVE) @@ -333,7 +358,6 @@ void SplitAnalysis::analyze(const LiveInterval *li) { analyzeUses(); } - //===----------------------------------------------------------------------===// // Split Editor //===----------------------------------------------------------------------===// @@ -347,8 +371,7 @@ SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()), - MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), - RegAssign(Allocator) {} + MBFI(mbfi), RegAssign(Allocator) {} void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit = &LRE; @@ -552,7 +575,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, if ((SubRegMask & ~LaneMask).any()) continue; - unsigned PopCount = countPopulation(SubRegMask.getAsInteger()); + unsigned PopCount = SubRegMask.getNumLanes(); PossibleIndexes.push_back(Idx); if (PopCount > BestCover) { BestCover = PopCount; @@ -572,7 +595,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); while (LanesLeft.any()) { unsigned BestIdx = 0; - int BestCover = INT_MIN; + int BestCover = std::numeric_limits<int>::min(); for (unsigned Idx : PossibleIndexes) { LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); // Early exit if we found a perfect match. @@ -583,8 +606,8 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, // Try to cover as much of the remaining lanes as possible but // as few of the already covered lanes as possible. - int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger()) - - countPopulation((SubRegMask & ~LanesLeft).getAsInteger()); + int Cover = (SubRegMask & LanesLeft).getNumLanes() + - (SubRegMask & ~LanesLeft).getNumLanes(); if (Cover > BestCover) { BestCover = Cover; BestIdx = Idx; @@ -706,7 +729,8 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); SlotIndex End = LIS.getMBBEndIdx(&MBB); SlotIndex Last = End.getPrevSlot(); - DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); + DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", " + << Last); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); @@ -785,7 +809,8 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before leaveIntvAtTop"); SlotIndex Start = LIS.getMBBStartIdx(&MBB); - DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); + DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", " + << Start); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); if (!ParentVNI) { @@ -875,23 +900,23 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, // Best candidate so far. MachineBasicBlock *BestMBB = MBB; - unsigned BestDepth = UINT_MAX; + unsigned BestDepth = std::numeric_limits<unsigned>::max(); - for (;;) { + while (true) { const MachineLoop *Loop = Loops.getLoopFor(MBB); // MBB isn't in a loop, it doesn't get any better. All dominators have a // higher frequency by definition. if (!Loop) { - DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" - << MBB->getNumber() << " at depth 0\n"); + DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " + << printMBBReference(*MBB) << " at depth 0\n"); return MBB; } // We'll never be able to exit the DefLoop. if (Loop == DefLoop) { - DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" - << MBB->getNumber() << " in the same loop\n"); + DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " + << printMBBReference(*MBB) << " in the same loop\n"); return MBB; } @@ -900,8 +925,8 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, if (Depth < BestDepth) { BestMBB = MBB; BestDepth = Depth; - DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" - << MBB->getNumber() << " at depth " << Depth << '\n'); + DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " + << printMBBReference(*MBB) << " at depth " << Depth << '\n'); } // Leave loop by going to the immediate dominator of the loop header. @@ -978,7 +1003,7 @@ void SplitEditor::hoistCopies() { // Track the nearest common dominator for all back-copies for each ParentVNI, // indexed by ParentVNI->id. - typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair; + using DomPair = std::pair<MachineBasicBlock *, SlotIndex>; SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums()); // The total cost of all the back-copies for each ParentVNI. SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums()); @@ -1040,7 +1065,7 @@ void SplitEditor::hoistCopies() { DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def << " for parent " << ParentVNI->id << '@' << ParentVNI->def - << " hoist to BB#" << Dom.first->getNumber() << ' ' + << " hoist to " << printMBBReference(*Dom.first) << ' ' << Dom.second << '\n'); } @@ -1088,7 +1113,6 @@ void SplitEditor::hoistCopies() { removeBackCopies(BackCopies); } - /// transferValues - Transfer all possible values to the new live ranges. /// Values that were rematerialized are left alone, they need LRCalc.extend(). bool SplitEditor::transferValues() { @@ -1118,7 +1142,7 @@ bool SplitEditor::transferValues() { // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx - << '(' << PrintReg(Edit->get(RegIdx)) << ')'); + << '(' << printReg(Edit->get(RegIdx)) << ')'); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. @@ -1151,7 +1175,7 @@ bool SplitEditor::transferValues() { if (Start != BlockStart) { VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); - DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB)); // MBB has its own def. Is it also live-out? if (BlockEnd <= End) LRC.setLiveOutValue(&*MBB, VNI); @@ -1164,7 +1188,7 @@ bool SplitEditor::transferValues() { // Handle the live-in blocks covered by [Start;End). assert(Start <= BlockStart && "Expected live-in block"); while (BlockStart < End) { - DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + DEBUG(dbgs() << ">" << printMBBReference(*MBB)); BlockEnd = LIS.getMBBEndIdx(&*MBB); if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. @@ -1276,6 +1300,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { struct ExtPoint { ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N) : MO(O), RegIdx(R), Next(N) {} + MachineOperand MO; unsigned RegIdx; SlotIndex Next; @@ -1306,7 +1331,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { unsigned RegIdx = RegAssign.lookup(Idx); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI.reg); - DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' + DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t' << Idx << ':' << RegIdx << '\t' << *MI); // Extend liveness to Idx if the instruction reads reg. @@ -1352,9 +1377,9 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { continue; // The problem here can be that the new register may have been created // for a partially defined original register. For example: - // %vreg827:subreg_hireg<def,read-undef> = ... + // %0:subreg_hireg<def,read-undef> = ... // ... - // %vreg828<def> = COPY %vreg827 + // %1 = COPY %0 if (S.empty()) continue; SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, @@ -1486,7 +1511,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { assert(!LRMap || LRMap->size() == Edit->size()); } - //===----------------------------------------------------------------------===// // Single Block Splitting //===----------------------------------------------------------------------===// @@ -1524,7 +1548,6 @@ void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { } } - //===----------------------------------------------------------------------===// // Global Live Range Splitting Support //===----------------------------------------------------------------------===// @@ -1542,9 +1565,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); - DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop - << ") intf " << LeaveBefore << '-' << EnterAfter - << ", live-through " << IntvIn << " -> " << IntvOut); + DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf " + << LeaveBefore << '-' << EnterAfter << ", live-through " + << IntvIn << " -> " << IntvOut); assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); @@ -1639,13 +1662,12 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); } - void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, unsigned IntvIn, SlotIndex LeaveBefore) { SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr << ", reg-in " << IntvIn << ", leave before " << LeaveBefore << (BI.LiveOut ? ", stack-out" : ", killed in block")); @@ -1737,7 +1759,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr << ", reg-out " << IntvOut << ", enter after " << EnterAfter << (BI.LiveIn ? ", stack-in" : ", defined in block")); diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 9d409e924a3d..c0608893d4e5 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -1,4 +1,4 @@ -//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===// +//===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,26 +17,32 @@ #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Support/Compiler.h" +#include <utility> namespace llvm { -class ConnectedVNInfoEqClasses; -class LiveInterval; class LiveIntervals; class LiveRangeEdit; class MachineBlockFrequencyInfo; -class MachineInstr; +class MachineDominatorTree; class MachineLoopInfo; class MachineRegisterInfo; class TargetInstrInfo; class TargetRegisterInfo; class VirtRegMap; -class VNInfo; -class raw_ostream; /// Determines the latest safe point in a block in which we can insert a split, /// spill or other instruction related with CurLI. @@ -116,7 +122,7 @@ public: private: // Current live interval. - const LiveInterval *CurLI; + const LiveInterval *CurLI = nullptr; /// Insert Point Analysis. InsertPointAnalysis IPA; @@ -200,7 +206,7 @@ public: /// analyze(li). unsigned countLiveBlocks(const LiveInterval *li) const; - typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; + using BlockPtrSet = SmallPtrSet<const MachineBasicBlock *, 16>; /// shouldSplitSingleBlock - Returns true if it would help to create a local /// live range for the instructions in BI. There is normally no benefit to @@ -221,7 +227,6 @@ public: } }; - /// SplitEditor - Edit machine code and LiveIntervals for live range /// splitting. /// @@ -245,7 +250,6 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor { const MachineBlockFrequencyInfo &MBFI; public: - /// ComplementSpillMode - Select how the complement live range should be /// created. SplitEditor automatically creates interval 0 to contain /// anything that isn't added to another interval. This complement interval @@ -273,19 +277,18 @@ public: }; private: - /// Edit - The current parent register and new intervals created. - LiveRangeEdit *Edit; + LiveRangeEdit *Edit = nullptr; /// Index into Edit of the currently open interval. /// The index 0 is used for the complement, so the first interval started by /// openIntv will be 1. - unsigned OpenIdx; + unsigned OpenIdx = 0; /// The current spill mode, selected by reset(). - ComplementSpillMode SpillMode; + ComplementSpillMode SpillMode = SM_Partition; - typedef IntervalMap<SlotIndex, unsigned> RegAssignMap; + using RegAssignMap = IntervalMap<SlotIndex, unsigned>; /// Allocator for the interval map. This will eventually be shared with /// SlotIndexes and LiveIntervals. @@ -296,8 +299,8 @@ private: /// Idx. RegAssignMap RegAssign; - typedef PointerIntPair<VNInfo*, 1> ValueForcePair; - typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap; + using ValueForcePair = PointerIntPair<VNInfo *, 1>; + using ValueMap = DenseMap<std::pair<unsigned, unsigned>, ValueForcePair>; /// Values - keep track of the mapping from parent values to values in the new /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: @@ -419,9 +422,9 @@ private: public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, LiveIntervals&, - VirtRegMap&, MachineDominatorTree&, - MachineBlockFrequencyInfo &); + SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, LiveIntervals &lis, + VirtRegMap &vrm, MachineDominatorTree &mdt, + MachineBlockFrequencyInfo &mbfi); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); @@ -536,6 +539,6 @@ public: unsigned IntvOut, SlotIndex EnterAfter); }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_SPLITKIT_H diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index e5fc5402cb41..608845498b48 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -1,4 +1,4 @@ -//===-- StackColoring.cpp -------------------------------------------------===// +//===- StackColoring.cpp --------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -22,35 +22,44 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <limits> +#include <memory> +#include <utility> using namespace llvm; @@ -366,6 +375,7 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // namespace { + /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. class StackColoring : public MachineFunctionPass { @@ -378,32 +388,40 @@ class StackColoring : public MachineFunctionPass { struct BlockLifetimeInfo { /// Which slots BEGINs in each basic block. BitVector Begin; + /// Which slots ENDs in each basic block. BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. BitVector LiveOut; }; /// Maps active slots (per bit) for each basic block. - typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap; + using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>; LivenessMap BlockLiveness; /// Maps serial numbers to basic blocks. - DenseMap<const MachineBasicBlock*, int> BasicBlocks; + DenseMap<const MachineBasicBlock *, int> BasicBlocks; + /// Maps basic blocks to a serial number. - SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; + SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering; /// Maps slots to their use interval. Outside of this interval, slots /// values are either dead or `undef` and they will not be written to. SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; + /// Maps slots to the points where they can become in-use. SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts; + /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. SlotIndexes *Indexes; + /// The stack protector object. StackProtector *SP; @@ -424,13 +442,18 @@ class StackColoring : public MachineFunctionPass { public: static char ID; + StackColoring() : MachineFunctionPass(ID) { initializeStackColoringPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; private: + /// Used in collectMarkers + using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>; + /// Debug. void dump() const; void dumpIntervals() const; @@ -489,13 +512,12 @@ private: /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); - - /// Used in collectMarkers - typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap; }; + } // end anonymous namespace char StackColoring::ID = 0; + char &llvm::StackColoringID = StackColoring::ID; INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, @@ -559,16 +581,13 @@ static inline int getStartOrEndSlot(const MachineInstr &MI) return -1; } -// // At the moment the only way to end a variable lifetime is with // a VARIABLE_LIFETIME op (which can't contain a start). If things // change and the IR allows for a single inst that both begins // and ends lifetime(s), this interface will need to be reworked. -// bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, SmallVector<int, 4> &slots, - bool &isStart) -{ + bool &isStart) { if (MI.getOpcode() == TargetOpcode::LIFETIME_START || MI.getOpcode() == TargetOpcode::LIFETIME_END) { int Slot = getStartOrEndSlot(MI); @@ -608,8 +627,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, return false; } -unsigned StackColoring::collectMarkers(unsigned NumSlot) -{ +unsigned StackColoring::collectMarkers(unsigned NumSlot) { unsigned MarkersFound = 0; BlockBitVecMap SeenStartMap; InterestingSlots.clear(); @@ -624,7 +642,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) // Step 1: collect markers and populate the "InterestingSlots" // and "ConservativeSlots" sets. for (MachineBasicBlock *MBB : depth_first(MF)) { - // Compute the set of slots for which we've seen a START marker but have // not yet seen an END marker at this point in the walk (e.g. on entry // to this bb). @@ -697,7 +714,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) // NOTE: We use a depth-first iteration to ensure that we obtain a // deterministic numbering. for (MachineBasicBlock *MBB : depth_first(MF)) { - // Assign a serial number to this basic block. BasicBlocks[MBB] = BasicBlockNumbering.size(); BasicBlockNumbering.push_back(MBB); @@ -723,7 +739,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) } else { for (auto Slot : slots) { DEBUG(dbgs() << "Found a use of slot #" << Slot); - DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index "); + DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index "); DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); if (Allocation) { @@ -745,8 +761,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) return MarkersFound; } -void StackColoring::calculateLocalLiveness() -{ +void StackColoring::calculateLocalLiveness() { unsigned NumIters = 0; bool changed = true; while (changed) { @@ -754,7 +769,6 @@ void StackColoring::calculateLocalLiveness() ++NumIters; for (const MachineBasicBlock *BB : BasicBlockNumbering) { - // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); @@ -792,7 +806,7 @@ void StackColoring::calculateLocalLiveness() BlockInfo.LiveOut |= LocalLiveOut; } } - }// while changed. + } // while changed. NumIterations = NumIters; } @@ -818,7 +832,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { - SmallVector<int, 4> slots; bool IsStart = false; if (!isLifetimeStartOrEnd(MI, slots, IsStart)) @@ -1047,7 +1060,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo()) for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap) for (WinEHHandlerType &H : TBME.HandlerArray) - if (H.CatchObj.FrameIndex != INT_MAX && + if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() && SlotRemap.count(H.CatchObj.FrameIndex)) H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex]; @@ -1116,8 +1129,7 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, bool StackColoring::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** Stack Coloring **********\n" - << "********** Function: " - << ((const Value*)Func.getFunction())->getName() << '\n'); + << "********** Function: " << Func.getName() << '\n'); MF = &Func; MFI = &MF->getFrameInfo(); Indexes = &getAnalysis<SlotIndexes>(); @@ -1157,7 +1169,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. if (NumMarkers < 2 || TotalSize < 16 || DisableColoring || - skipFunction(*Func.getFunction())) { + skipFunction(Func.getFunction())) { DEBUG(dbgs()<<"Will not try to merge slots.\n"); return removeAllMarkers(); } @@ -1231,7 +1243,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { LiveInterval *Second = &*Intervals[SecondSlot]; auto &FirstS = LiveStarts[FirstSlot]; auto &SecondS = LiveStarts[SecondSlot]; - assert (!First->empty() && !Second->empty() && "Found an empty range"); + assert(!First->empty() && !Second->empty() && "Found an empty range"); // Merge disjoint slots. This is a little bit tricky - see the // Implementation Notes section for an explanation. diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index a5ef7c8229f5..cc9af92c395f 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -19,10 +19,10 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index b4fa29d9a86b..e66a25bec911 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -16,6 +16,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -27,9 +30,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOpcodes.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -41,7 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" static cl::opt<int> StackMapVersion( - "stackmap-version", cl::init(3), + "stackmap-version", cl::init(3), cl::Hidden, cl::desc("Specify the stackmap encoding version (default = 3)")); const char *StackMaps::WSMP = "Stack Maps: "; @@ -193,14 +193,14 @@ void StackMaps::print(raw_ostream &OS) { case Location::Register: OS << "Register "; if (TRI) - OS << TRI->getName(Loc.Reg); + OS << printReg(Loc.Reg, TRI); else OS << Loc.Reg; break; case Location::Direct: OS << "Direct "; if (TRI) - OS << TRI->getName(Loc.Reg); + OS << printReg(Loc.Reg, TRI); else OS << Loc.Reg; if (Loc.Offset) @@ -209,7 +209,7 @@ void StackMaps::print(raw_ostream &OS) { case Location::Indirect: OS << "Indirect "; if (TRI) - OS << TRI->getName(Loc.Reg); + OS << printReg(Loc.Reg, TRI); else OS << Loc.Reg; OS << "+" << Loc.Offset; @@ -233,7 +233,7 @@ void StackMaps::print(raw_ostream &OS) { for (const auto &LO : LiveOuts) { OS << WSMP << "\t\tLO " << Idx << ": "; if (TRI) - OS << TRI->getName(LO.Reg); + OS << printReg(LO.Reg, TRI); else OS << LO.Reg; OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte " diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index d8e7840a2576..62cef95a4af2 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -14,14 +14,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/StackProtector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -42,10 +44,8 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <utility> using namespace llvm; @@ -247,10 +247,12 @@ bool StackProtector::RequiresStackProtector() { OptimizationRemarkEmitter ORE(F); if (F->hasFnAttribute(Attribute::StackProtectReq)) { - ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F) + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F) << "Stack protection applied to function " << ore::NV("Function", F) - << " due to a function attribute or command-line switch"); + << " due to a function attribute or command-line switch"; + }); NeedsProtector = true; Strong = true; // Use the same heuristic as strong to determine SSPLayout } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) @@ -264,29 +266,31 @@ bool StackProtector::RequiresStackProtector() { for (const Instruction &I : BB) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { if (AI->isArrayAllocation()) { - OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray", - &I); - Remark - << "Stack protection applied to function " - << ore::NV("Function", F) - << " due to a call to alloca or use of a variable length array"; + auto RemarkBuilder = [&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorAllocaOrArray", + &I) + << "Stack protection applied to function " + << ore::NV("Function", F) + << " due to a call to alloca or use of a variable length " + "array"; + }; if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); - ORE.emit(Remark); + ORE.emit(RemarkBuilder); NeedsProtector = true; } else if (Strong) { // Require protectors for all alloca calls in strong mode. Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); - ORE.emit(Remark); + ORE.emit(RemarkBuilder); NeedsProtector = true; } } else { // A call to alloca with a variable size requires protectors. Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); - ORE.emit(Remark); + ORE.emit(RemarkBuilder); NeedsProtector = true; } continue; @@ -296,11 +300,13 @@ bool StackProtector::RequiresStackProtector() { if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray : SSPLK_SmallArray)); - ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I) + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I) << "Stack protection applied to function " << ore::NV("Function", F) << " due to a stack allocated buffer or struct containing a " - "buffer"); + "buffer"; + }); NeedsProtector = true; continue; } @@ -308,11 +314,13 @@ bool StackProtector::RequiresStackProtector() { if (Strong && HasAddressTaken(AI)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I) - << "Stack protection applied to function " - << ore::NV("Function", F) - << " due to the address of a local variable being taken"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", + &I) + << "Stack protection applied to function " + << ore::NV("Function", F) + << " due to the address of a local variable being taken"; + }); NeedsProtector = true; } } @@ -377,8 +385,12 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { + // If the target wants to XOR the frame pointer into the guard value, it's + // impossible to emit the check in IR, so the target *must* support stack + // protection in SDAG. bool SupportsSelectionDAGSP = - EnableSelectionDAGSP && !TM->Options.EnableFastISel; + TLI->useStackGuardXorFP() || + (EnableSelectionDAGSP && !TM->Options.EnableFastISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 856bca19dee8..62f662d1ade4 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -1,4 +1,4 @@ -//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===// +//===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===// // // The LLVM Compiler Infrastructure // @@ -14,22 +14,34 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/IR/Module.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> #include <vector> + using namespace llvm; #define DEBUG_TYPE "stack-slot-coloring" @@ -45,6 +57,7 @@ STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { + class StackSlotColoring : public MachineFunctionPass { LiveStacks* LS; MachineFrameInfo *MFI; @@ -73,7 +86,7 @@ namespace { BitVector AllColors; // NextColor - Next "color" that's not yet used. - int NextColor; + int NextColor = -1; // UsedColors - "Colors" that have been assigned. BitVector UsedColors; @@ -83,10 +96,10 @@ namespace { public: static char ID; // Pass identification - StackSlotColoring() : - MachineFunctionPass(ID), NextColor(-1) { - initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); - } + + StackSlotColoring() : MachineFunctionPass(ID) { + initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -111,9 +124,11 @@ namespace { MachineFunction &MF); bool RemoveDeadStores(MachineBasicBlock* MBB); }; + } // end anonymous namespace char StackSlotColoring::ID = 0; + char &llvm::StackSlotColoringID = StackSlotColoring::ID; INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE, @@ -125,14 +140,16 @@ INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) namespace { - // IntervalSorter - Comparison predicate that sort live intervals by - // their weight. - struct IntervalSorter { - bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { - return LHS->weight > RHS->weight; - } - }; -} + +// IntervalSorter - Comparison predicate that sort live intervals by +// their weight. +struct IntervalSorter { + bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { + return LHS->weight > RHS->weight; + } +}; + +} // end anonymous namespace /// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot /// references and update spill slot weights. @@ -185,8 +202,10 @@ void StackSlotColoring::InitializeSlots() { UsedColors.resize(LastFI); Assignments.resize(LastFI); - typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair; + using Pair = std::iterator_traits<LiveStacks::iterator>::value_type; + SmallVector<Pair *, 16> Intervals; + Intervals.reserve(LS->getNumIntervals()); for (auto &I : *LS) Intervals.push_back(&I); @@ -229,10 +248,11 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { } /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. -/// int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; + int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + if (!DisableSharing) { // Check if it's possible to reuse any of the used colors. Color = UsedColors.find_first(); @@ -246,6 +266,11 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { } } + if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) { + DEBUG(dbgs() << "cannot share FIs with different stack IDs\n"); + Share = false; + } + // Assign it to the first available color (assumed to be the best) if it's // not possible to share a used color with other objects. if (!Share) { @@ -257,7 +282,6 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Record the assignment. Assignments[Color].push_back(li); - int FI = TargetRegisterInfo::stackSlot2Index(li->reg); DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple @@ -364,7 +388,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI, // The MachineMemOperands have already been updated. } - /// RemoveDeadStores - Scan through a basic block and look for loads followed /// by stores. If they're both using the same stack slot, then the store is /// definitely dead. This could obviously be much more aggressive (consider @@ -426,7 +449,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { return changed; } - bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 489a607eb176..df1eebf43b2b 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -15,6 +15,8 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/Pass.h" @@ -47,12 +49,15 @@ char &llvm::TailDuplicateID = TailDuplicatePass::ID; INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - Duplicator.initMF(MF, MBPI, /* LayoutMode */ false); + // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better + // split this into two passes instead. + bool PreRegAlloc = MF.getRegInfo().isSSA(); + Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index dc7265dcf6c2..f51c884839b3 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,16 +28,15 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" -#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <iterator> @@ -75,7 +75,7 @@ static cl::opt<bool> static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); -void TailDuplicator::initMF(MachineFunction &MFin, +void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; @@ -89,7 +89,7 @@ void TailDuplicator::initMF(MachineFunction &MFin, assert(MBPI != nullptr && "Machine Branch Probability Info required"); LayoutMode = LayoutModeIn; - PreRegAlloc = MRI->isSSA(); + this->PreRegAlloc = PreRegAlloc; } static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { @@ -111,9 +111,10 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } if (!Found) { - dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - dbgs() << " missing input from predecessor BB#" - << PredBB->getNumber() << '\n'; + dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " + << *MI; + dbgs() << " missing input from predecessor " + << printMBBReference(*PredBB) << '\n'; llvm_unreachable(nullptr); } } @@ -121,15 +122,16 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); if (CheckExtra && !Preds.count(PHIBB)) { - dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " - << *MI; - dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() - << '\n'; + dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB) + << ": " << *MI; + dbgs() << " extra input from predecessor " + << printMBBReference(*PHIBB) << '\n'; llvm_unreachable(nullptr); } if (PHIBB->getNumber() < 0) { - dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " + << *MI; + dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n'; llvm_unreachable(nullptr); } } @@ -369,10 +371,10 @@ void TailDuplicator::duplicateInstruction( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, RegSubRegPair> &LocalVRMap, const DenseSet<unsigned> &UsedByPhi) { - MachineInstr *NewMI = TII->duplicate(*MI, *MF); + MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); if (PreRegAlloc) { - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); + for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -443,7 +445,6 @@ void TailDuplicator::duplicateInstruction( } } } - PredBB->insert(PredBB->instr_end(), NewMI); } /// After FromBB is tail duplicated into its predecessor blocks, the successors @@ -549,7 +550,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, unsigned MaxDuplicateCount; if (TailDupSize == 0 && TailDuplicateSize.getNumOccurrences() == 0 && - MF->getFunction()->optForSize()) + MF->getFunction().optForSize()) MaxDuplicateCount = 1; else if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; @@ -784,7 +785,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallVectorImpl<MachineInstr *> &Copies) { - DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) + << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); @@ -825,10 +827,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // Clone the contents of TailBB into PredBB. DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; - // Use instr_iterator here to properly handle bundles, e.g. - // ARM Thumb2 IT block. - MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); - while (I != TailBB->instr_end()) { + for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); + I != E; /* empty */) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9dd98b4020d2..b2151eb49655 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -15,16 +15,16 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -32,7 +32,7 @@ TargetFrameLowering::~TargetFrameLowering() = default; /// The default implementation just looks at attribute "no-frame-pointer-elim". bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { - auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim"); + auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim"); return Attr.getValueAsString() == "true"; } @@ -82,7 +82,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, return; // In Naked functions we aren't going to save any registers. - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return; // Functions which call __builtin_unwind_init get all their registers saved. @@ -99,7 +99,7 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( const MachineFunction &MF) const { // When HHVM function is called, the stack is skewed as the return address // is removed from the stack before we enter the function. - if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM)) + if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM)) return MF.getTarget().getPointerSize(); return 0; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 14c5adc0d898..db925f803db6 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -19,6 +19,9 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -26,10 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cctype> using namespace llvm; @@ -67,6 +67,11 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, llvm_unreachable("Target didn't implement insertNoop!"); } +static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) { + return strncmp(Str, MAI.getCommentString().data(), + MAI.getCommentString().size()) == 0; +} + /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not @@ -75,29 +80,46 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, /// multiple instructions separated by SeparatorString or newlines. /// Variable-length instructions are not handled here; this function /// may be overloaded in the target code to do that. +/// We implement a special case of the .space directive which takes only a +/// single integer argument in base 10 that is the size in bytes. This is a +/// restricted form of the GAS directive in that we only interpret +/// simple--i.e. not a logical or arithmetic expression--size values without +/// the optional fill value. This is primarily used for creating arbitrary +/// sized inline asm blocks for testing purposes. unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const { // Count the number of instructions in the asm. - bool atInsnStart = true; - unsigned InstCount = 0; + bool AtInsnStart = true; + unsigned Length = 0; for (; *Str; ++Str) { if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) { - atInsnStart = true; - } else if (strncmp(Str, MAI.getCommentString().data(), - MAI.getCommentString().size()) == 0) { + AtInsnStart = true; + } else if (isAsmComment(Str, MAI)) { // Stop counting as an instruction after a comment until the next // separator. - atInsnStart = false; + AtInsnStart = false; } - if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { - ++InstCount; - atInsnStart = false; + if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + unsigned AddLength = MAI.getMaxInstLength(); + if (strncmp(Str, ".space", 6) == 0) { + char *EStr; + int SpaceSize; + SpaceSize = strtol(Str + 6, &EStr, 10); + SpaceSize = SpaceSize < 0 ? 0 : SpaceSize; + while (*EStr != '\n' && std::isspace(static_cast<unsigned char>(*EStr))) + ++EStr; + if (*EStr == '\0' || *EStr == '\n' || + isAsmComment(EStr, MAI)) // Successfully parsed .space argument + AddLength = SpaceSize; + } + Length += AddLength; + AtInsnStart = false; } } - return InstCount * MAI.getMaxInstLength(); + return Length; } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything @@ -169,7 +191,7 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, MachineInstr *CommutedMI = nullptr; if (NewMI) { // Create a new instruction. - MachineFunction &MF = *MI.getParent()->getParent(); + MachineFunction &MF = *MI.getMF(); CommutedMI = MF.CloneMachineInstr(&MI); } else { CommutedMI = &MI; @@ -388,10 +410,11 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0, return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } -MachineInstr *TargetInstrInfo::duplicate(MachineInstr &Orig, - MachineFunction &MF) const { +MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const { assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated"); - return MF.CloneMachineInstr(&Orig); + MachineFunction &MF = *MBB.getParent(); + return MF.CloneMachineInstrBundle(MBB, InsertBefore, Orig); } // If the COPY instruction in MI can be folded to a stack operation, return @@ -415,7 +438,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && "Cannot fold physregs"); - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) @@ -495,21 +518,13 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, return NewMI; } -/// foldMemoryOperand - Attempt to fold a load or store of the specified stack -/// slot into the specified machine instruction for the specified operand(s). -/// If this is possible, a new instruction is returned with the specified -/// operand folded, otherwise NULL is returned. The client is responsible for -/// removing the old instruction and adding the new one in the instruction -/// stream. MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, int FI, LiveIntervals *LIS) const { auto Flags = MachineMemOperand::MONone; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (MI.getOperand(Ops[i]).isDef()) - Flags |= MachineMemOperand::MOStore; - else - Flags |= MachineMemOperand::MOLoad; + for (unsigned OpIdx : Ops) + Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore + : MachineMemOperand::MOLoad; MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "foldMemoryOperand needs an inserted instruction"); @@ -525,10 +540,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, if (Flags & MachineMemOperand::MOStore) { MemSize = MFI.getObjectSize(FI); } else { - for (unsigned Idx : Ops) { + for (unsigned OpIdx : Ops) { int64_t OpSize = MFI.getObjectSize(FI); - if (auto SubReg = MI.getOperand(Idx).getSubReg()) { + if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) { unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg); if (SubRegSize > 0 && !(SubRegSize % 8)) OpSize = SubRegSize / 8; @@ -590,6 +605,54 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, return &*--Pos; } +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineInstr &LoadMI, + LiveIntervals *LIS) const { + assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned OpIdx : Ops) + assert(MI.getOperand(OpIdx).isUse() && "Folding load into def!"); +#endif + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + + // Ask the target to do the actual folding. + MachineInstr *NewMI = nullptr; + int FrameIndex = 0; + + if ((MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) && + isLoadFromStackSlot(LoadMI, FrameIndex)) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + if (NewMI) + NewMI = &*MBB.insert(MI, NewMI); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); + } + + if (!NewMI) + return nullptr; + + // Copy the memoperands from the load to the folded instruction. + if (MI.memoperands_empty()) { + NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end()); + } else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), + E = LoadMI.memoperands_end(); + I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } + return NewMI; +} + bool TargetInstrInfo::hasReassociableOperands( const MachineInstr &Inst, const MachineBasicBlock *MBB) const { const MachineOperand &Op1 = Inst.getOperand(1); @@ -685,11 +748,13 @@ bool TargetInstrInfo::getMachineCombinerPatterns( return false; } + /// Return true when a code sequence can improve loop throughput. bool TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { return false; } + /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( @@ -698,7 +763,7 @@ void TargetInstrInfo::reassociateOps( SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { - MachineFunction *MF = Root.getParent()->getParent(); + MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -781,7 +846,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); // Select the previous instruction in the sequence based on the input pattern. MachineInstr *Prev = nullptr; @@ -803,59 +868,9 @@ void TargetInstrInfo::genAlternativeCodeSequence( reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); } -/// foldMemoryOperand - Same as the previous version except it allows folding -/// of any load and store from / to any address, not just from a specific -/// stack slot. -MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, - ArrayRef<unsigned> Ops, - MachineInstr &LoadMI, - LiveIntervals *LIS) const { - assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); -#ifndef NDEBUG - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!"); -#endif - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - - // Ask the target to do the actual folding. - MachineInstr *NewMI = nullptr; - int FrameIndex = 0; - - if ((MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT || - MI.getOpcode() == TargetOpcode::STATEPOINT) && - isLoadFromStackSlot(LoadMI, FrameIndex)) { - // Fold stackmap/patchpoint. - NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); - if (NewMI) - NewMI = &*MBB.insert(MI, NewMI); - } else { - // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); - } - - if (!NewMI) return nullptr; - - // Copy the memoperands from the load to the folded instruction. - if (MI.memoperands_empty()) { - NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end()); - } - else { - // Handle the rare case of folding multiple loads. - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), - E = LoadMI.memoperands_end(); - I != E; ++I) { - NewMI->addMemOperand(MF, *I); - } - } - return NewMI; -} - bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( const MachineInstr &MI, AliasAnalysis *AA) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. @@ -933,7 +948,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( } int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 3914ee514712..543c12eebb45 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1,4 +1,4 @@ -//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===// +//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===// // // The LLVM Compiler Infrastructure // @@ -13,32 +13,55 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <cctype> +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> + using namespace llvm; static cl::opt<bool> JumpIsExpensiveOverride( @@ -78,186 +101,13 @@ static cl::opt<int> MinPercentageForPredictableBranch( cl::Hidden); /// InitLibcallNames - Set default libcall names. -/// static void InitLibcallNames(const char **Names, const Triple &TT) { - Names[RTLIB::SHL_I16] = "__ashlhi3"; - Names[RTLIB::SHL_I32] = "__ashlsi3"; - Names[RTLIB::SHL_I64] = "__ashldi3"; - Names[RTLIB::SHL_I128] = "__ashlti3"; - Names[RTLIB::SRL_I16] = "__lshrhi3"; - Names[RTLIB::SRL_I32] = "__lshrsi3"; - Names[RTLIB::SRL_I64] = "__lshrdi3"; - Names[RTLIB::SRL_I128] = "__lshrti3"; - Names[RTLIB::SRA_I16] = "__ashrhi3"; - Names[RTLIB::SRA_I32] = "__ashrsi3"; - Names[RTLIB::SRA_I64] = "__ashrdi3"; - Names[RTLIB::SRA_I128] = "__ashrti3"; - Names[RTLIB::MUL_I8] = "__mulqi3"; - Names[RTLIB::MUL_I16] = "__mulhi3"; - Names[RTLIB::MUL_I32] = "__mulsi3"; - Names[RTLIB::MUL_I64] = "__muldi3"; - Names[RTLIB::MUL_I128] = "__multi3"; - Names[RTLIB::MULO_I32] = "__mulosi4"; - Names[RTLIB::MULO_I64] = "__mulodi4"; - Names[RTLIB::MULO_I128] = "__muloti4"; - Names[RTLIB::SDIV_I8] = "__divqi3"; - Names[RTLIB::SDIV_I16] = "__divhi3"; - Names[RTLIB::SDIV_I32] = "__divsi3"; - Names[RTLIB::SDIV_I64] = "__divdi3"; - Names[RTLIB::SDIV_I128] = "__divti3"; - Names[RTLIB::UDIV_I8] = "__udivqi3"; - Names[RTLIB::UDIV_I16] = "__udivhi3"; - Names[RTLIB::UDIV_I32] = "__udivsi3"; - Names[RTLIB::UDIV_I64] = "__udivdi3"; - Names[RTLIB::UDIV_I128] = "__udivti3"; - Names[RTLIB::SREM_I8] = "__modqi3"; - Names[RTLIB::SREM_I16] = "__modhi3"; - Names[RTLIB::SREM_I32] = "__modsi3"; - Names[RTLIB::SREM_I64] = "__moddi3"; - Names[RTLIB::SREM_I128] = "__modti3"; - Names[RTLIB::UREM_I8] = "__umodqi3"; - Names[RTLIB::UREM_I16] = "__umodhi3"; - Names[RTLIB::UREM_I32] = "__umodsi3"; - Names[RTLIB::UREM_I64] = "__umoddi3"; - Names[RTLIB::UREM_I128] = "__umodti3"; - - Names[RTLIB::NEG_I32] = "__negsi2"; - Names[RTLIB::NEG_I64] = "__negdi2"; - Names[RTLIB::ADD_F32] = "__addsf3"; - Names[RTLIB::ADD_F64] = "__adddf3"; - Names[RTLIB::ADD_F80] = "__addxf3"; - Names[RTLIB::ADD_F128] = "__addtf3"; - Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; - Names[RTLIB::SUB_F32] = "__subsf3"; - Names[RTLIB::SUB_F64] = "__subdf3"; - Names[RTLIB::SUB_F80] = "__subxf3"; - Names[RTLIB::SUB_F128] = "__subtf3"; - Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; - Names[RTLIB::MUL_F32] = "__mulsf3"; - Names[RTLIB::MUL_F64] = "__muldf3"; - Names[RTLIB::MUL_F80] = "__mulxf3"; - Names[RTLIB::MUL_F128] = "__multf3"; - Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; - Names[RTLIB::DIV_F32] = "__divsf3"; - Names[RTLIB::DIV_F64] = "__divdf3"; - Names[RTLIB::DIV_F80] = "__divxf3"; - Names[RTLIB::DIV_F128] = "__divtf3"; - Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; - Names[RTLIB::REM_F32] = "fmodf"; - Names[RTLIB::REM_F64] = "fmod"; - Names[RTLIB::REM_F80] = "fmodl"; - Names[RTLIB::REM_F128] = "fmodl"; - Names[RTLIB::REM_PPCF128] = "fmodl"; - Names[RTLIB::FMA_F32] = "fmaf"; - Names[RTLIB::FMA_F64] = "fma"; - Names[RTLIB::FMA_F80] = "fmal"; - Names[RTLIB::FMA_F128] = "fmal"; - Names[RTLIB::FMA_PPCF128] = "fmal"; - Names[RTLIB::POWI_F32] = "__powisf2"; - Names[RTLIB::POWI_F64] = "__powidf2"; - Names[RTLIB::POWI_F80] = "__powixf2"; - Names[RTLIB::POWI_F128] = "__powitf2"; - Names[RTLIB::POWI_PPCF128] = "__powitf2"; - Names[RTLIB::SQRT_F32] = "sqrtf"; - Names[RTLIB::SQRT_F64] = "sqrt"; - Names[RTLIB::SQRT_F80] = "sqrtl"; - Names[RTLIB::SQRT_F128] = "sqrtl"; - Names[RTLIB::SQRT_PPCF128] = "sqrtl"; - Names[RTLIB::LOG_F32] = "logf"; - Names[RTLIB::LOG_F64] = "log"; - Names[RTLIB::LOG_F80] = "logl"; - Names[RTLIB::LOG_F128] = "logl"; - Names[RTLIB::LOG_PPCF128] = "logl"; - Names[RTLIB::LOG2_F32] = "log2f"; - Names[RTLIB::LOG2_F64] = "log2"; - Names[RTLIB::LOG2_F80] = "log2l"; - Names[RTLIB::LOG2_F128] = "log2l"; - Names[RTLIB::LOG2_PPCF128] = "log2l"; - Names[RTLIB::LOG10_F32] = "log10f"; - Names[RTLIB::LOG10_F64] = "log10"; - Names[RTLIB::LOG10_F80] = "log10l"; - Names[RTLIB::LOG10_F128] = "log10l"; - Names[RTLIB::LOG10_PPCF128] = "log10l"; - Names[RTLIB::EXP_F32] = "expf"; - Names[RTLIB::EXP_F64] = "exp"; - Names[RTLIB::EXP_F80] = "expl"; - Names[RTLIB::EXP_F128] = "expl"; - Names[RTLIB::EXP_PPCF128] = "expl"; - Names[RTLIB::EXP2_F32] = "exp2f"; - Names[RTLIB::EXP2_F64] = "exp2"; - Names[RTLIB::EXP2_F80] = "exp2l"; - Names[RTLIB::EXP2_F128] = "exp2l"; - Names[RTLIB::EXP2_PPCF128] = "exp2l"; - Names[RTLIB::SIN_F32] = "sinf"; - Names[RTLIB::SIN_F64] = "sin"; - Names[RTLIB::SIN_F80] = "sinl"; - Names[RTLIB::SIN_F128] = "sinl"; - Names[RTLIB::SIN_PPCF128] = "sinl"; - Names[RTLIB::COS_F32] = "cosf"; - Names[RTLIB::COS_F64] = "cos"; - Names[RTLIB::COS_F80] = "cosl"; - Names[RTLIB::COS_F128] = "cosl"; - Names[RTLIB::COS_PPCF128] = "cosl"; - Names[RTLIB::POW_F32] = "powf"; - Names[RTLIB::POW_F64] = "pow"; - Names[RTLIB::POW_F80] = "powl"; - Names[RTLIB::POW_F128] = "powl"; - Names[RTLIB::POW_PPCF128] = "powl"; - Names[RTLIB::CEIL_F32] = "ceilf"; - Names[RTLIB::CEIL_F64] = "ceil"; - Names[RTLIB::CEIL_F80] = "ceill"; - Names[RTLIB::CEIL_F128] = "ceill"; - Names[RTLIB::CEIL_PPCF128] = "ceill"; - Names[RTLIB::TRUNC_F32] = "truncf"; - Names[RTLIB::TRUNC_F64] = "trunc"; - Names[RTLIB::TRUNC_F80] = "truncl"; - Names[RTLIB::TRUNC_F128] = "truncl"; - Names[RTLIB::TRUNC_PPCF128] = "truncl"; - Names[RTLIB::RINT_F32] = "rintf"; - Names[RTLIB::RINT_F64] = "rint"; - Names[RTLIB::RINT_F80] = "rintl"; - Names[RTLIB::RINT_F128] = "rintl"; - Names[RTLIB::RINT_PPCF128] = "rintl"; - Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; - Names[RTLIB::NEARBYINT_F64] = "nearbyint"; - Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; - Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; - Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; - Names[RTLIB::ROUND_F32] = "roundf"; - Names[RTLIB::ROUND_F64] = "round"; - Names[RTLIB::ROUND_F80] = "roundl"; - Names[RTLIB::ROUND_F128] = "roundl"; - Names[RTLIB::ROUND_PPCF128] = "roundl"; - Names[RTLIB::FLOOR_F32] = "floorf"; - Names[RTLIB::FLOOR_F64] = "floor"; - Names[RTLIB::FLOOR_F80] = "floorl"; - Names[RTLIB::FLOOR_F128] = "floorl"; - Names[RTLIB::FLOOR_PPCF128] = "floorl"; - Names[RTLIB::FMIN_F32] = "fminf"; - Names[RTLIB::FMIN_F64] = "fmin"; - Names[RTLIB::FMIN_F80] = "fminl"; - Names[RTLIB::FMIN_F128] = "fminl"; - Names[RTLIB::FMIN_PPCF128] = "fminl"; - Names[RTLIB::FMAX_F32] = "fmaxf"; - Names[RTLIB::FMAX_F64] = "fmax"; - Names[RTLIB::FMAX_F80] = "fmaxl"; - Names[RTLIB::FMAX_F128] = "fmaxl"; - Names[RTLIB::FMAX_PPCF128] = "fmaxl"; - Names[RTLIB::ROUND_F32] = "roundf"; - Names[RTLIB::ROUND_F64] = "round"; - Names[RTLIB::ROUND_F80] = "roundl"; - Names[RTLIB::ROUND_F128] = "roundl"; - Names[RTLIB::ROUND_PPCF128] = "roundl"; - Names[RTLIB::COPYSIGN_F32] = "copysignf"; - Names[RTLIB::COPYSIGN_F64] = "copysign"; - Names[RTLIB::COPYSIGN_F80] = "copysignl"; - Names[RTLIB::COPYSIGN_F128] = "copysignl"; - Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; - Names[RTLIB::FPEXT_F32_PPCF128] = "__gcc_stoq"; - Names[RTLIB::FPEXT_F64_PPCF128] = "__gcc_dtoq"; - Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; - Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; - Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; +#define HANDLE_LIBCALL(code, name) \ + Names[RTLIB::code] = name; +#include "llvm/CodeGen/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL + + // A few names are different on particular architectures or environments. if (TT.isOSDarwin()) { // For f16/f32 conversions, Darwin uses the standard naming scheme, instead // of the gnueabi-style __gnu_*_ieee. @@ -268,264 +118,8 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; } - Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2"; - Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2"; - Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2"; - Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2"; - Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; - Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; - Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_PPCF128_F32] = "__gcc_qtos"; - Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; - Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; - Names[RTLIB::FPROUND_PPCF128_F64] = "__gcc_qtod"; - Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; - Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; - Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; - Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; - Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; - Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; - Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; - Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; - Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; - Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; - Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; - Names[RTLIB::FPTOSINT_PPCF128_I32] = "__gcc_qtou"; - Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; - Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; - Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; - Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; - Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; - Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; - Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; - Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; - Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; - Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; - Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; - Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; - Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; - Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; - Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I32_PPCF128] = "__gcc_itoq"; - Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; - Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; - Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; - Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; - Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; - Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; - Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; - Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; - Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; - Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; - Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; - Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I32_PPCF128] = "__gcc_utoq"; - Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; - Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; - Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; - Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; - Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; - Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; - Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; - Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; - Names[RTLIB::OEQ_F32] = "__eqsf2"; - Names[RTLIB::OEQ_F64] = "__eqdf2"; - Names[RTLIB::OEQ_F128] = "__eqtf2"; - Names[RTLIB::OEQ_PPCF128] = "__gcc_qeq"; - Names[RTLIB::UNE_F32] = "__nesf2"; - Names[RTLIB::UNE_F64] = "__nedf2"; - Names[RTLIB::UNE_F128] = "__netf2"; - Names[RTLIB::UNE_PPCF128] = "__gcc_qne"; - Names[RTLIB::OGE_F32] = "__gesf2"; - Names[RTLIB::OGE_F64] = "__gedf2"; - Names[RTLIB::OGE_F128] = "__getf2"; - Names[RTLIB::OGE_PPCF128] = "__gcc_qge"; - Names[RTLIB::OLT_F32] = "__ltsf2"; - Names[RTLIB::OLT_F64] = "__ltdf2"; - Names[RTLIB::OLT_F128] = "__lttf2"; - Names[RTLIB::OLT_PPCF128] = "__gcc_qlt"; - Names[RTLIB::OLE_F32] = "__lesf2"; - Names[RTLIB::OLE_F64] = "__ledf2"; - Names[RTLIB::OLE_F128] = "__letf2"; - Names[RTLIB::OLE_PPCF128] = "__gcc_qle"; - Names[RTLIB::OGT_F32] = "__gtsf2"; - Names[RTLIB::OGT_F64] = "__gtdf2"; - Names[RTLIB::OGT_F128] = "__gttf2"; - Names[RTLIB::OGT_PPCF128] = "__gcc_qgt"; - Names[RTLIB::UO_F32] = "__unordsf2"; - Names[RTLIB::UO_F64] = "__unorddf2"; - Names[RTLIB::UO_F128] = "__unordtf2"; - Names[RTLIB::UO_PPCF128] = "__gcc_qunord"; - Names[RTLIB::O_F32] = "__unordsf2"; - Names[RTLIB::O_F64] = "__unorddf2"; - Names[RTLIB::O_F128] = "__unordtf2"; - Names[RTLIB::O_PPCF128] = "__gcc_qunord"; - Names[RTLIB::MEMCPY] = "memcpy"; - Names[RTLIB::MEMMOVE] = "memmove"; - Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = - "__llvm_memcpy_element_unordered_atomic_1"; - Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = - "__llvm_memcpy_element_unordered_atomic_2"; - Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = - "__llvm_memcpy_element_unordered_atomic_4"; - Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = - "__llvm_memcpy_element_unordered_atomic_8"; - Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = - "__llvm_memcpy_element_unordered_atomic_16"; - Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = - "__llvm_memmove_element_unordered_atomic_1"; - Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = - "__llvm_memmove_element_unordered_atomic_2"; - Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = - "__llvm_memmove_element_unordered_atomic_4"; - Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = - "__llvm_memmove_element_unordered_atomic_8"; - Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = - "__llvm_memmove_element_unordered_atomic_16"; - Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = - "__llvm_memset_element_unordered_atomic_1"; - Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = - "__llvm_memset_element_unordered_atomic_2"; - Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = - "__llvm_memset_element_unordered_atomic_4"; - Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = - "__llvm_memset_element_unordered_atomic_8"; - Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = - "__llvm_memset_element_unordered_atomic_16"; - Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; - Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; - Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; - Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; - Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; - Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; - Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; - Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; - Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; - Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; - Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; - Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; - Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; - Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; - Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; - Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; - Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; - Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; - Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; - Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; - Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; - Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; - Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; - Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; - Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; - Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; - Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; - Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; - Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; - Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; - Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; - Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; - Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; - Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; - Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; - Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; - Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; - Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; - Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; - Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; - Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; - - Names[RTLIB::ATOMIC_LOAD] = "__atomic_load"; - Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1"; - Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2"; - Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4"; - Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8"; - Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16"; - - Names[RTLIB::ATOMIC_STORE] = "__atomic_store"; - Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1"; - Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2"; - Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4"; - Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8"; - Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16"; - - Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange"; - Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1"; - Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2"; - Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4"; - Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8"; - Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16"; - - Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange"; - Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1"; - Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2"; - Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4"; - Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8"; - Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16"; - - Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1"; - Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2"; - Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4"; - Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8"; - Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16"; - Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1"; - Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2"; - Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4"; - Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8"; - Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16"; - Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1"; - Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2"; - Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4"; - Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8"; - Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16"; - Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1"; - Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2"; - Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4"; - Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8"; - Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16"; - Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1"; - Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2"; - Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4"; - Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8"; - Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16"; - Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1"; - Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2"; - Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4"; - Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8"; - Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16"; - - if (TT.isGNUEnvironment()) { + + if (TT.isGNUEnvironment() || TT.isOSFuchsia()) { Names[RTLIB::SINCOS_F32] = "sincosf"; Names[RTLIB::SINCOS_F64] = "sincos"; Names[RTLIB::SINCOS_F80] = "sincosl"; @@ -533,11 +127,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SINCOS_PPCF128] = "sincosl"; } - if (!TT.isOSOpenBSD()) { - Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; + if (TT.isOSOpenBSD()) { + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; } - - Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize"; } /// Set default libcall CallingConvs. @@ -858,7 +450,6 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { } /// InitCmpLibcallCCs - Set default comparison libcall CC. -/// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); CCs[RTLIB::OEQ_F32] = ISD::SETEQ; @@ -929,6 +520,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MaxAtomicSizeInBitsSupported = 1024; MinCmpXchgSizeInBits = 0; + SupportsUnalignedAtomics = false; std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr); @@ -1044,7 +636,6 @@ void TargetLoweringBase::initActions() { // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. - // setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } @@ -1156,7 +747,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // found, fallback to the usual mechanism of widening/splitting the // vector. EVT OldEltVT = EltVT; - while (1) { + while (true) { // Increase the bitwidth of the element to the next pow-of-two // (which is greater than 8 bits). EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits()) @@ -1184,7 +775,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // Try to widen the vector until a legal type is found. // If there is no wider legal type, split the vector. - while (1) { + while (true) { // Round up to the next power of 2. NumElts = (unsigned)NextPowerOf2(NumElts); @@ -1276,7 +867,7 @@ MachineBasicBlock * TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, MachineBasicBlock *MBB) const { MachineInstr *MI = &InitialMI; - MachineFunction &MF = *MI->getParent()->getParent(); + MachineFunction &MF = *MI->getMF(); MachineFrameInfo &MFI = MF.getFrameInfo(); // We're handling multiple types of operands here: @@ -1495,7 +1086,7 @@ void TargetLoweringBase::computeRegisterProperties( bool IsLegalWiderType = false; LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); switch (PreferredAction) { - case TypePromoteInteger: { + case TypePromoteInteger: // Try to promote the elements of integer vectors. If no legal // promotion was found, fall through to the widen-vector method. for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) { @@ -1515,8 +1106,8 @@ void TargetLoweringBase::computeRegisterProperties( if (IsLegalWiderType) break; LLVM_FALLTHROUGH; - } - case TypeWidenVector: { + + case TypeWidenVector: // Try to widen the vector. for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; @@ -1533,7 +1124,7 @@ void TargetLoweringBase::computeRegisterProperties( if (IsLegalWiderType) break; LLVM_FALLTHROUGH; - } + case TypeSplitVector: case TypeScalarizeVector: { MVT IntermediateVT; @@ -1598,7 +1189,6 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { /// This method returns the number of registers needed, and the VT for each /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. -/// unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, @@ -1911,7 +1501,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { /// by AM is legal for this target, for a load/store of the specified type. bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. @@ -2002,8 +1592,8 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { /// Get the reciprocal estimate attribute string for a function that will /// override the target defaults. static StringRef getRecipEstimateForFunc(MachineFunction &MF) { - const Function *F = MF.getFunction(); - return F->getFnAttribute("reciprocal-estimates").getValueAsString(); + const Function &F = MF.getFunction(); + return F.getFnAttribute("reciprocal-estimates").getValueAsString(); } /// Construct a string for the given reciprocal operation of the given type. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 6922e33c8d6c..24d4baa31e1f 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -52,6 +52,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Format.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -134,7 +135,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(DL.getPointerABIAlignment()); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0)); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -168,8 +169,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( MMI, Streamer); } -static SectionKind -getELFKindForNamedSection(StringRef Name, SectionKind K) { +static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are no the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given @@ -531,10 +531,8 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, Name = ".ctors"; else Name = ".dtors"; - if (Priority != 65535) { - Name += '.'; - Name += utostr(65535 - Priority); - } + if (Priority != 65535) + raw_string_ostream(Name) << format(".%05u", 65535 - Priority); Type = ELF::SHT_PROGBITS; } @@ -1213,16 +1211,38 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, } } +static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, + const Triple &T, bool IsCtor, + unsigned Priority, + const MCSymbol *KeySym, + MCSectionCOFF *Default) { + if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) + return Ctx.getAssociativeCOFFSection(Default, KeySym, 0); + + std::string Name = IsCtor ? ".ctors" : ".dtors"; + if (Priority != 65535) + raw_string_ostream(Name) << format(".%05u", 65535 - Priority); + + return Ctx.getAssociativeCOFFSection( + Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getData()), + KeySym, 0); +} + MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getContext().getAssociativeCOFFSection( - cast<MCSectionCOFF>(StaticCtorSection), KeySym, 0); + return getCOFFStaticStructorSection(getContext(), getTargetTriple(), true, + Priority, KeySym, + cast<MCSectionCOFF>(StaticCtorSection)); } MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getContext().getAssociativeCOFFSection( - cast<MCSectionCOFF>(StaticDtorSection), KeySym, 0); + return getCOFFStaticStructorSection(getContext(), getTargetTriple(), false, + Priority, KeySym, + cast<MCSectionCOFF>(StaticDtorSection)); } void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( @@ -1234,32 +1254,40 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( // Wasm //===----------------------------------------------------------------------===// -static const Comdat *getWasmComdat(const GlobalValue *GV) { +static void checkWasmComdat(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); if (!C) - return nullptr; + return; - if (C->getSelectionKind() != Comdat::Any) - report_fatal_error("Wasm COMDATs only support SelectionKind::Any, '" + - C->getName() + "' cannot be lowered."); + // TODO(sbc): At some point we may need COMDAT support but currently + // they are not supported. + report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() + + "' cannot be lowered."); +} - return C; +static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { + // If we're told we have function data, then use that. + if (K.isText()) + return SectionKind::getText(); + + // Otherwise, ignore whatever section type the generic impl detected and use + // a plain data section. + return SectionKind::getData(); } MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - llvm_unreachable("getExplicitSectionGlobal not yet implemented"); - return nullptr; + StringRef Name = GO->getSection(); + checkWasmComdat(GO); + Kind = getWasmKindForNamedSection(Name, Kind); + return getContext().getWasmSection(Name, Kind); } -static MCSectionWasm * -selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO, - SectionKind Kind, Mangler &Mang, - const TargetMachine &TM, bool EmitUniqueSection, - unsigned Flags, unsigned *NextUniqueID) { +static MCSectionWasm *selectWasmSectionForGlobal( + MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) { StringRef Group = ""; - if (getWasmComdat(GO)) - llvm_unreachable("comdat not yet supported for wasm"); + checkWasmComdat(GO); bool UniqueSectionNames = TM.getUniqueSectionNames(); SmallString<128> Name = getSectionPrefixForGlobal(Kind); @@ -1279,8 +1307,7 @@ selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO, UniqueID = *NextUniqueID; (*NextUniqueID)++; } - return Ctx.getWasmSection(Name, /*Type=*/0, Flags, - Group, UniqueID); + return Ctx.getWasmSection(Name, Kind, Group, UniqueID); } MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( @@ -1299,8 +1326,7 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( EmitUniqueSection |= GO->hasComdat(); return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM, - EmitUniqueSection, /*Flags=*/0, - &NextUniqueID); + EmitUniqueSection, &NextUniqueID); } bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection( @@ -1330,7 +1356,21 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } -void -TargetLoweringObjectFileWasm::InitializeWasm() { - // TODO: Initialize StaticCtorSection and StaticDtorSection. +void TargetLoweringObjectFileWasm::InitializeWasm() { + StaticCtorSection = + getContext().getWasmSection(".init_array", SectionKind::getData()); +} + +MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return Priority == UINT16_MAX ? + StaticCtorSection : + getContext().getWasmSection(".init_array." + utostr(Priority), + SectionKind::getData()); +} + +MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + llvm_unreachable("@llvm.global_dtors should have been lowered already"); + return nullptr; } diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index ed845e1706f8..853e71d0efa5 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -13,11 +13,11 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -28,7 +28,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { return true; // Check to see if we should eliminate non-leaf frame pointers. - if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf")) + if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf")) return MF.getFrameInfo().hasCalls(); return false; diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 817e58ce59e1..121bed5a79cb 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -47,6 +47,9 @@ using namespace llvm; +cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, + cl::desc("Enable interprocedural register allocation " + "to reduce load/store at procedure calls.")); static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc Scheduler")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -90,7 +93,11 @@ static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inli static cl::opt<bool> EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), - cl::init(false)); + cl::init(false), cl::Hidden); +static cl::opt<bool> + EnableMergeICmps("enable-mergeicmps", + cl::desc("Merge ICmp chains into a single memcmp"), + cl::init(false), cl::Hidden); static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, @@ -104,6 +111,11 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner", cl::Hidden, cl::desc("Enable machine outliner")); +static cl::opt<bool> EnableLinkOnceODROutlining( + "enable-linkonceodr-outlining", + cl::Hidden, + cl::desc("Enable the machine outliner on linkonceodr functions"), + cl::init(false)); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -115,10 +127,9 @@ static cl::opt<cl::boolOrDefault> EnableGlobalISel("global-isel", cl::Hidden, cl::desc("Enable the \"global\" instruction selector")); -static cl::opt<std::string> -PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, - cl::desc("Print machine instrs"), - cl::value_desc("pass-name"), cl::init("option-unspecified")); +static cl::opt<std::string> PrintMachineInstrs( + "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), + cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden); static cl::opt<int> EnableGlobalISelAbort( "global-isel-abort", cl::Hidden, @@ -153,6 +164,34 @@ static cl::opt<CFLAAType> UseCFLAA( clEnumValN(CFLAAType::Both, "both", "Enable both variants of CFL-AA"))); +/// Option names for limiting the codegen pipeline. +/// Those are used in error reporting and we didn't want +/// to duplicate their names all over the place. +const char *StartAfterOptName = "start-after"; +const char *StartBeforeOptName = "start-before"; +const char *StopAfterOptName = "stop-after"; +const char *StopBeforeOptName = "stop-before"; + +static cl::opt<std::string> + StartAfterOpt(StringRef(StartAfterOptName), + cl::desc("Resume compilation after a specific pass"), + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); + +static cl::opt<std::string> + StartBeforeOpt(StringRef(StartBeforeOptName), + cl::desc("Resume compilation before a specific pass"), + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); + +static cl::opt<std::string> + StopAfterOpt(StringRef(StopAfterOptName), + cl::desc("Stop compilation after a specific pass"), + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); + +static cl::opt<std::string> + StopBeforeOpt(StringRef(StopBeforeOptName), + cl::desc("Stop compilation before a specific pass"), + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -282,6 +321,37 @@ TargetPassConfig::~TargetPassConfig() { delete Impl; } +static const PassInfo *getPassInfo(StringRef PassName) { + if (PassName.empty()) + return nullptr; + + const PassRegistry &PR = *PassRegistry::getPassRegistry(); + const PassInfo *PI = PR.getPassInfo(PassName); + if (!PI) + report_fatal_error(Twine('\"') + Twine(PassName) + + Twine("\" pass is not registered.")); + return PI; +} + +static AnalysisID getPassIDFromName(StringRef PassName) { + const PassInfo *PI = getPassInfo(PassName); + return PI ? PI->getTypeInfo() : nullptr; +} + +void TargetPassConfig::setStartStopPasses() { + StartBefore = getPassIDFromName(StartBeforeOpt); + StartAfter = getPassIDFromName(StartAfterOpt); + StopBefore = getPassIDFromName(StopBeforeOpt); + StopAfter = getPassIDFromName(StopAfterOpt); + if (StartBefore && StartAfter) + report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") + + Twine(StartAfterOptName) + Twine(" specified!")); + if (StopBefore && StopAfter) + report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") + + Twine(StopAfterOptName) + Twine(" specified!")); + Started = (StartAfter == nullptr) && (StartBefore == nullptr); +} + // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) @@ -303,8 +373,17 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) if (StringRef(PrintMachineInstrs.getValue()).equals("")) TM.Options.PrintMachineCode = true; + if (EnableIPRA.getNumOccurrences()) + TM.Options.EnableIPRA = EnableIPRA; + else { + // If not explicitly specified, use target default. + TM.Options.EnableIPRA = TM.useIPRA(); + } + if (TM.Options.EnableIPRA) setRequiresCodeGenSCCOrder(); + + setStartStopPasses(); } CodeGenOpt::Level TargetPassConfig::getOptLevel() const { @@ -339,6 +418,30 @@ TargetPassConfig::TargetPassConfig() "triple set?"); } +bool TargetPassConfig::hasLimitedCodeGenPipeline() const { + return StartBefore || StartAfter || StopBefore || StopAfter; +} + +std::string +TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) const { + if (!hasLimitedCodeGenPipeline()) + return std::string(); + std::string Res; + static cl::opt<std::string> *PassNames[] = {&StartAfterOpt, &StartBeforeOpt, + &StopAfterOpt, &StopBeforeOpt}; + static const char *OptNames[] = {StartAfterOptName, StartBeforeOptName, + StopAfterOptName, StopBeforeOptName}; + bool IsFirst = true; + for (int Idx = 0; Idx < 4; ++Idx) + if (!PassNames[Idx]->empty()) { + if (!IsFirst) + Res += Separator; + IsFirst = false; + Res += OptNames[Idx]; + } + return Res; +} + // Helper to verify the analysis is really immutable. void TargetPassConfig::setOpt(bool &Opt, bool Val) { assert(!Initialized && "PassConfig is immutable"); @@ -496,6 +599,16 @@ void TargetPassConfig::addIRPasses() { addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } + if (getOptLevel() != CodeGenOpt::None) { + // The MergeICmpsPass tries to create memcmp calls by grouping sequences of + // loads and compares. ExpandMemCmpPass then tries to expand those calls + // into optimally-sized loads and compares. The transforms are enabled by a + // target lowering hook. + if (EnableMergeICmps) + addPass(createMergeICmpsPass()); + addPass(createExpandMemCmpPass()); + } + // Run GC lowering passes for builtin collectors // TODO: add a pass insertion point here addPass(createGCLoweringPass()); @@ -511,8 +624,8 @@ void TargetPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) addPass(createPartiallyInlineLibCallsPass()); - // Insert calls to mcount-like functions. - addPass(createCountingFunctionInserterPass()); + // Instrument function entry and exit, e.g. with calls to mcount(). + addPass(createPostInlineEntryExitInstrumenterPass()); // Add scalarization of target's unsupported masked memory intrinsics pass. // the unsupported intrinsic will be replaced with a chain of basic blocks, @@ -653,10 +766,9 @@ bool TargetPassConfig::addISelPasses() { /// -regalloc=... command line option. static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, - RegisterPassParser<RegisterRegAlloc> > -RegAlloc("regalloc", - cl::init(&useDefaultRegisterAllocator), - cl::desc("Register allocator to use")); + RegisterPassParser<RegisterRegAlloc>> + RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use")); /// Add the complete set of target-independent postISel code generator passes. /// @@ -694,9 +806,6 @@ void TargetPassConfig::addMachinePasses() { // Print the instruction selected machine code... printAndVerify("After Instruction Selection"); - if (TM->Options.EnableIPRA) - addPass(createRegUsageInfoPropPass()); - // Expand pseudo-instructions emitted by ISel. addPass(&ExpandISelPseudosID); @@ -709,6 +818,9 @@ void TargetPassConfig::addMachinePasses() { addPass(&LocalStackSlotAllocationID, false); } + if (TM->Options.EnableIPRA) + addPass(createRegUsageInfoPropPass()); + // Run pre-ra passes. addPreRegAlloc(); @@ -788,7 +900,7 @@ void TargetPassConfig::addMachinePasses() { addPass(&PatchableFunctionID, false); if (EnableMachineOutliner) - PM->add(createMachineOutlinerPass()); + PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining)); AddingMachinePasses = false; } @@ -824,9 +936,6 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&MachineLICMID, false); addPass(&MachineCSEID, false); - // Coalesce basic blocks with the same branch condition - addPass(&BranchCoalescingID); - addPass(&MachineSinkingID); addPass(&PeepholeOptimizerID); diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index eeb00a784b0d..f03c3b8300f3 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -11,13 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -27,9 +31,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <utility> @@ -41,11 +42,14 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, const LaneBitmask *SRILaneMasks, - LaneBitmask SRICoveringLanes) + LaneBitmask SRICoveringLanes, + const RegClassInfo *const RCIs, + unsigned Mode) : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIndexLaneMasks(SRILaneMasks), RegClassBegin(RCB), RegClassEnd(RCE), - CoveringLanes(SRICoveringLanes) { + CoveringLanes(SRICoveringLanes), + RCInfos(RCIs), HwMode(Mode) { } TargetRegisterInfo::~TargetRegisterInfo() = default; @@ -65,8 +69,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, continue; for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) { if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) { - dbgs() << "Error: Super register " << PrintReg(*SR, this) - << " of reserved register " << PrintReg(Reg, this) + dbgs() << "Error: Super register " << printReg(*SR, this) + << " of reserved register " << printReg(Reg, this) << " is not reserved.\n"; return false; } @@ -81,7 +85,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, namespace llvm { -Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, +Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, unsigned SubIdx) { return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { if (!Reg) @@ -89,11 +93,15 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, else if (TargetRegisterInfo::isStackSlot(Reg)) OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; + OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + else if (!TRI) + OS << '%' << "physreg" << Reg; + else if (Reg < TRI->getNumRegs()) { + OS << '%'; + printLowerCase(TRI->getName(Reg), OS); + } else + llvm_unreachable("Register kind is unsupported."); + if (SubIdx) { if (TRI) OS << ':' << TRI->getSubRegIndexName(SubIdx); @@ -103,7 +111,7 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, }); } -Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { // Generic printout when TRI is missing. if (!TRI) { @@ -126,12 +134,27 @@ Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + OS << '%' << TargetRegisterInfo::virtReg2Index(Unit); } else { - OS << PrintRegUnit(Unit, TRI); + OS << printRegUnit(Unit, TRI); + } + }); +} + +Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { + return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { + if (RegInfo.getRegClassOrNull(Reg)) + OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + else if (RegInfo.getRegBankOrNull(Reg)) + OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); + else { + OS << "_"; + assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && + "Generic registers must have a valid type"); } }); } @@ -357,7 +380,7 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, } // Compute target-independent register allocator hints to help eliminate copies. -void +bool TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, @@ -365,49 +388,55 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg); - - // Hints with HintType != 0 were set by target-dependent code. - // Such targets must provide their own implementation of - // TRI::getRegAllocationHints to interpret those hint types. - assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); - - // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Hint.second; - if (VRM && isVirtualRegister(Phys)) - Phys = VRM->getPhys(Phys); - - // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) - return; - if (MRI.isReserved(Phys)) - return; - // Check that Phys is in the allocation order. We shouldn't heed hints - // from VirtReg's register class if they aren't in the allocation order. The - // target probably has a reason for removing the register. - if (!is_contained(Order, Phys)) - return; - - // All clear, tell the register allocator to prefer this register. - Hints.push_back(Phys); + const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); + + // First hint may be a target hint. + bool Skip = (Hints_MRI.first != 0); + for (auto Reg : Hints_MRI.second) { + if (Skip) { + Skip = false; + continue; + } + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Reg; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + continue; + if (MRI.isReserved(Phys)) + continue; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (!is_contained(Order, Phys)) + continue; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); + } + return false; } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { - return !MF.getFunction()->hasFnAttribute("no-realign-stack"); + return !MF.getFunction().hasFnAttribute("no-realign-stack"); } bool TargetRegisterInfo::needsStackRealignment( const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) || - F->hasFnAttribute(Attribute::StackAlignment)); - if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) { + F.hasFnAttribute(Attribute::StackAlignment)); + if (F.hasFnAttribute("stackrealign") || requiresRealignment) { if (canRealignStack(MF)) return true; - DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n"); + DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n"); } return false; } @@ -425,6 +454,6 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, LLVM_DUMP_METHOD void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, const TargetRegisterInfo *TRI) { - dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n"; + dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n"; } #endif diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 9210ea8a83f6..86dbf1b2aeab 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -16,15 +16,15 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -316,7 +316,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // correctly append imp-use operands, and readsReg() strangely returns false // for predicated defs. unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); - const MachineFunction &MF = *DefMI->getParent()->getParent(); + const MachineFunction &MF = *DefMI->getMF(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)) return computeInstrLatency(DefMI); @@ -339,42 +339,46 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, static Optional<double> getRThroughputFromItineraries(unsigned schedClass, const InstrItineraryData *IID){ - double Unknown = std::numeric_limits<double>::infinity(); - double Throughput = Unknown; + Optional<double> Throughput; for (const InstrStage *IS = IID->beginStage(schedClass), *E = IID->endStage(schedClass); IS != E; ++IS) { - unsigned Cycles = IS->getCycles(); - if (!Cycles) - continue; - Throughput = - std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles); + if (IS->getCycles()) { + double Temp = countPopulation(IS->getUnits()) * 1.0 / IS->getCycles(); + Throughput = Throughput.hasValue() + ? std::min(Throughput.getValue(), Temp) + : Temp; + } } - // We need reciprocal throughput that's why we return such value. - return 1 / Throughput; + if (Throughput.hasValue()) + // We need reciprocal throughput that's why we return such value. + return 1 / Throughput.getValue(); + return Throughput; } static Optional<double> getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, const TargetSubtargetInfo *STI, const MCSchedModel &SchedModel) { - double Unknown = std::numeric_limits<double>::infinity(); - double Throughput = Unknown; + Optional<double> Throughput; for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc), *WEnd = STI->getWriteProcResEnd(SCDesc); WPR != WEnd; ++WPR) { - unsigned Cycles = WPR->Cycles; - if (!Cycles) - return Optional<double>(); - - unsigned NumUnits = - SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits; - Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles); + if (WPR->Cycles) { + unsigned NumUnits = + SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits; + double Temp = NumUnits * 1.0 / WPR->Cycles; + Throughput = Throughput.hasValue() + ? std::min(Throughput.getValue(), Temp) + : Temp; + } } - // We need reciprocal throughput that's why we return such value. - return 1 / Throughput; + if (Throughput.hasValue()) + // We need reciprocal throughput that's why we return such value. + return 1 / Throughput.getValue(); + return Throughput; } Optional<double> diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index f6d5bc80ddff..1a317cd865f0 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <string> @@ -50,6 +51,10 @@ bool TargetSubtargetInfo::enableRALocalReassignment( return true; } +bool TargetSubtargetInfo::enableAdvancedRASplitCost() const { + return false; +} + bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } @@ -93,9 +98,15 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const { // that could be changed during the compilation TargetSchedModel TSchedModel; TSchedModel.init(getSchedModel(), this, getInstrInfo()); - if (!TSchedModel.hasInstrSchedModel()) + unsigned Latency; + if (TSchedModel.hasInstrSchedModel()) + Latency = TSchedModel.computeInstrLatency(MCI.getOpcode()); + else if (TSchedModel.hasInstrItineraries()) { + auto *ItinData = TSchedModel.getInstrItineraries(); + Latency = ItinData->getStageLatency( + getInstrInfo()->get(MCI.getOpcode()).getSchedClass()); + } else return std::string(); - unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode()); Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(MCI.getOpcode()); return createSchedInfoStr(Latency, RThroughput); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 83c00e24d14f..774b76f84b7f 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1,4 +1,4 @@ -//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===// // // The LLVM Compiler Infrastructure // @@ -28,27 +28,40 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <iterator> +#include <utility> using namespace llvm; @@ -76,6 +89,7 @@ static cl::opt<unsigned> MaxDataFlowEdge( "the benefit of commuting operands")); namespace { + class TwoAddressInstructionPass : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; @@ -96,6 +110,10 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // Set of already processed instructions in the current block. SmallPtrSet<MachineInstr*, 8> Processed; + // Set of instructions converted to three-address by target and then sunk + // down current basic block. + SmallPtrSet<MachineInstr*, 8> SunkInstrs; + // A map from virtual registers to physical registers which are likely targets // to be coalesced to due to copies from physical registers to virtual // registers. e.g. v1024 = move r0. @@ -148,14 +166,16 @@ class TwoAddressInstructionPass : public MachineFunctionPass { void processCopy(MachineInstr *MI); - typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; - typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + using TiedPairList = SmallVector<std::pair<unsigned, unsigned>, 4>; + using TiedOperandMap = SmallDenseMap<unsigned, TiedPairList>; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); void eliminateRegSequence(MachineBasicBlock::iterator&); public: static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); } @@ -175,17 +195,19 @@ public: /// Pass entry point. bool runOnMachineFunction(MachineFunction&) override; }; + } // end anonymous namespace char TwoAddressInstructionPass::ID = 0; + +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; + INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) -char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; - static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); /// A two-address instruction has been converted to a three-address instruction @@ -267,7 +289,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, ++KillPos; unsigned NumVisited = 0; - for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) { + for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) { // DBG_VALUE cannot be counted against the limit. if (OtherMI.isDebugValue()) continue; @@ -436,8 +458,8 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, /// For example, in this code: /// /// %reg1034 = copy %reg1024 -/// %reg1035 = copy %reg1025<kill> -/// %reg1036 = add %reg1034<kill>, %reg1035<kill> +/// %reg1035 = copy killed %reg1025 +/// %reg1036 = add killed %reg1034, killed %reg1035 /// /// %reg1034 is not considered to be killed, since it is copied from a /// register which is not killed. Treating it as not killed lets the @@ -452,7 +474,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, LiveIntervals *LIS, bool allowFalsePositives) { MachineInstr *DefMI = &MI; - for (;;) { + while (true) { // All uses of physical registers are likely to be kills. if (TargetRegisterInfo::isPhysicalRegister(Reg) && (allowFalsePositives || MRI->hasOneUse(Reg))) @@ -569,31 +591,31 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // general, we want no uses between this instruction and the definition of // the two-address register. // e.g. - // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 - // %reg1029<def> = MOV8rr %reg1028 - // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> - // insert => %reg1030<def> = MOV8rr %reg1028 - // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags + // insert => %reg1030 = MOV8rr %reg1028 + // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags // In this case, it might not be possible to coalesce the second MOV8rr // instruction if the first one is coalesced. So it would be profitable to // commute it: - // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 - // %reg1029<def> = MOV8rr %reg1028 - // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> - // insert => %reg1030<def> = MOV8rr %reg1029 - // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> + // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags + // insert => %reg1030 = MOV8rr %reg1029 + // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags if (!isPlainlyKilled(MI, regC, LIS)) return false; // Ok, we have something like: - // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags // let's see if it's worth commuting it. // Look for situations like this: - // %reg1024<def> = MOV r1 - // %reg1025<def> = MOV r0 - // %reg1026<def> = ADD %reg1024, %reg1025 + // %reg1024 = MOV r1 + // %reg1025 = MOV r0 + // %reg1026 = ADD %reg1024, %reg1025 // r0 = MOV %reg1026 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. unsigned ToRegA = getMappedReg(regA, DstRegMap); @@ -691,9 +713,9 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, bool TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: - // %reg1024<def> = MOV r1 - // %reg1025<def> = MOV r0 - // %reg1026<def> = ADD %reg1024, %reg1025 + // %reg1024 = MOV r1 + // %reg1025 = MOV r0 + // %reg1026 = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. unsigned FromRegB = getMappedReg(RegB, SrcRegMap); @@ -738,6 +760,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, mi = NewMI; nmi = std::next(mi); } + else + SunkInstrs.insert(NewMI); // Update source and destination register maps. SrcRegMap.erase(RegA); @@ -904,7 +928,6 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Move the copies connected to MI down as well. MachineBasicBlock::iterator Begin = MI; MachineBasicBlock::iterator AfterMI = std::next(Begin); - MachineBasicBlock::iterator End = AfterMI; while (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) { @@ -916,7 +939,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, unsigned NumVisited = 0; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; - for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) { + for (MachineInstr &OtherMI : make_range(End, KillPos)) { // DBG_VALUE cannot be counted against the limit. if (OtherMI.isDebugValue()) continue; @@ -1090,7 +1113,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Check if the reschedule will not break depedencies. unsigned NumVisited = 0; for (MachineInstr &OtherMI : - llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) { + make_range(mi, MachineBasicBlock::iterator(KillMI))) { // DBG_VALUE cannot be counted against the limit. if (OtherMI.isDebugValue()) continue; @@ -1443,7 +1466,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); - // Deal with <undef> uses immediately - simply rewrite the src operand. + // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. if (TargetRegisterInfo::isVirtualRegister(DstReg)) @@ -1609,7 +1632,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (I->end == UseIdx) LI.removeSegment(LastCopyIdx, UseIdx); } - } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was @@ -1639,6 +1661,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { else AA = nullptr; OptLevel = TM.getOptLevel(); + // Disable optimizations if requested. We cannot skip the whole pass as some + // fixups are necessary for correctness. + if (skipFunction(Func.getFunction())) + OptLevel = CodeGenOpt::None; bool MadeChange = false; @@ -1658,10 +1684,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); + SunkInstrs.clear(); for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = std::next(mi); - if (mi->isDebugValue()) { + // Don't revisit an instruction previously converted by target. It may + // contain undef register operands (%noreg), which are not handled. + if (mi->isDebugValue() || SunkInstrs.count(&*mi)) { mi = nmi; continue; } @@ -1690,7 +1719,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1) { - SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs + SmallVectorImpl<std::pair<unsigned, unsigned>> &TiedPairs = TiedOperands.begin()->second; if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; @@ -1749,9 +1778,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { /// /// Becomes: /// -/// %dst:ssub0<def,undef> = COPY %v1 -/// %dst:ssub1<def> = COPY %v2 -/// +/// undef %dst:ssub0 = COPY %v1 +/// %dst:ssub1 = COPY %v2 void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -1775,7 +1803,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineOperand &UseMO = MI.getOperand(i); unsigned SrcReg = UseMO.getReg(); unsigned SubIdx = MI.getOperand(i+1).getImm(); - // Nothing needs to be inserted for <undef> operands. + // Nothing needs to be inserted for undef operands. if (UseMO.isUndef()) continue; @@ -1797,7 +1825,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { .addReg(DstReg, RegState::Define, SubIdx) .add(UseMO); - // The first def needs an <undef> flag because there is no live register + // The first def needs an undef flag because there is no live register // before it. if (!DefEmitted) { CopyMI->getOperand(0).setIsUndef(true); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 407fd9b162e9..5288ca672774 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" @@ -37,7 +38,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; static bool eliminateUnreachableBlock(Function &F) { @@ -207,11 +207,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { MachineRegisterInfo &MRI = F.getRegInfo(); unsigned InputSub = Input.getSubReg(); if (InputSub == 0 && - MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) { + MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)) && + !Input.isUndef()) { MRI.replaceRegWith(OutputReg, InputReg); } else { // The input register to the PHI has a subregister or it can't be - // constrained to the proper register class: + // constrained to the proper register class or it is undef: // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index f8aacdb8649d..64bb37a280a6 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +//===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===// // // The LLVM Compiler Infrastructure // @@ -18,24 +18,32 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "LiveDebugVariables.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -132,8 +140,8 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { - OS << '[' << PrintReg(Reg, TRI) << " -> " - << PrintReg(Virt2PhysMap[Reg], TRI) << "] " + OS << '[' << printReg(Reg, TRI) << " -> " + << printReg(Virt2PhysMap[Reg], TRI) << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; } } @@ -141,7 +149,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { - OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] + OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; } } @@ -164,9 +172,9 @@ LLVM_DUMP_METHOD void VirtRegMap::dump() const { // according to LiveIntervals. // namespace { + class VirtRegRewriter : public MachineFunctionPass { MachineFunction *MF; - const TargetMachine *TM; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; MachineRegisterInfo *MRI; @@ -184,18 +192,23 @@ class VirtRegRewriter : public MachineFunctionPass { public: static char ID; + VirtRegRewriter() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction&) override; + MachineFunctionProperties getSetProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } }; + } // end anonymous namespace +char VirtRegRewriter::ID = 0; + char &llvm::VirtRegRewriterID = VirtRegRewriter::ID; INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", @@ -208,8 +221,6 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) -char VirtRegRewriter::ID = 0; - void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<LiveIntervals>(); @@ -224,7 +235,6 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { MF = &fn; - TM = &MF->getTarget(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); MRI = &MF->getRegInfo(); @@ -260,8 +270,9 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, assert(!LI.empty()); assert(LI.hasSubRanges()); - typedef std::pair<const LiveInterval::SubRange *, - LiveInterval::const_iterator> SubRangeIteratorPair; + using SubRangeIteratorPair = + std::pair<const LiveInterval::SubRange *, LiveInterval::const_iterator>; + SmallVector<SubRangeIteratorPair, 4> SubRanges; SlotIndex First; SlotIndex Last; @@ -369,8 +380,8 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { ++NumIdCopies; // Copies like: - // %R0 = COPY %R0<undef> - // %AL = COPY %AL, %EAX<imp-def> + // %r0 = COPY undef %r0 + // %al = COPY %al, implicit-def %eax // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL // instruction to maintain this information. @@ -477,7 +488,7 @@ void VirtRegRewriter::rewrite() { if (SubReg != 0) { if (NoSubRegLiveness) { // A virtual register kill refers to the whole register, so we may - // have to add <imp-use,kill> operands for the super-register. A + // have to add implicit killed operands for the super-register. A // partial redef always kills and redefines the super-register. if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) @@ -502,9 +513,9 @@ void VirtRegRewriter::rewrite() { } } - // The <def,undef> and <def,internal> flags only make sense for + // The def undef and def internal flags only make sense for // sub-register defs, and we are substituting a full physreg. An - // <imp-use,kill> operand from the SuperKills list will represent the + // implicit killed operand from the SuperKills list will represent the // partial read of the super-register. if (MO.isDef()) { MO.setIsUndef(false); @@ -519,6 +530,7 @@ void VirtRegRewriter::rewrite() { // Rewrite. Note we could have used MachineOperand::substPhysReg(), but // we need the inlining here. MO.setReg(PhysReg); + MO.setIsRenamableIfNoExtraRegAllocReq(); } // Add any missing super-register kills after rewriting the whole diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index c63a0a9e60ea..7ad84734203d 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -1014,6 +1014,7 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) { removeUnreachableBlocks(F); } +#ifndef NDEBUG void WinEHPrepare::verifyPreparedFunclets(Function &F) { for (BasicBlock &BB : F) { size_t NumColors = BlockColors[&BB].size(); @@ -1026,6 +1027,7 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) { "EH Pad still has a PHI!"); } } +#endif bool WinEHPrepare::prepareExplicitEH(Function &F) { // Remove unreachable blocks. It is not valuable to assign them a color and diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 0b4c6e551667..3d83afcf1fc5 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -14,8 +14,8 @@ // //===---------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -23,17 +23,26 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { +struct InstrumentationOptions { + // Whether to emit PATCHABLE_TAIL_CALL. + bool HandleTailcall; + + // Whether to emit PATCHABLE_RET/PATCHABLE_FUNCTION_EXIT for all forms of + // return, e.g. conditional return. + bool HandleAllReturns; +}; + struct XRayInstrumentation : public MachineFunctionPass { static char ID; @@ -59,7 +68,8 @@ private: // This is the approach to go on CPUs which have a single RET instruction, // like x86/x86_64. void replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII, + InstrumentationOptions); // Prepend the original return instruction with the exit sled code ("patchable // function exit" pseudo-instruction), preserving the original return @@ -70,25 +80,28 @@ private: // have to call the trampoline and return from it to the original return // instruction of the function being instrumented. void prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII, + InstrumentationOptions); }; } // end anonymous namespace void XRayInstrumentation::replaceRetWithPatchableRet( - MachineFunction &MF, const TargetInstrInfo *TII) { + MachineFunction &MF, const TargetInstrInfo *TII, + InstrumentationOptions op) { // We look for *all* terminators and returns, then replace those with // PATCHABLE_RET instructions. SmallVector<MachineInstr *, 4> Terminators; for (auto &MBB : MF) { for (auto &T : MBB.terminators()) { unsigned Opc = 0; - if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { + if (T.isReturn() && + (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) { // Replace return instructions with: // PATCHABLE_RET <Opcode>, <Operand>... Opc = TargetOpcode::PATCHABLE_RET; } - if (TII->isTailCall(T)) { + if (TII->isTailCall(T) && op.HandleTailcall) { // Treat the tail call as a return instruction, which has a // different-looking sled than the normal return case. Opc = TargetOpcode::PATCHABLE_TAIL_CALL; @@ -108,14 +121,16 @@ void XRayInstrumentation::replaceRetWithPatchableRet( } void XRayInstrumentation::prependRetWithPatchableExit( - MachineFunction &MF, const TargetInstrInfo *TII) { - for (auto &MBB : MF) { + MachineFunction &MF, const TargetInstrInfo *TII, + InstrumentationOptions op) { + for (auto &MBB : MF) for (auto &T : MBB.terminators()) { unsigned Opc = 0; - if (T.isReturn()) { + if (T.isReturn() && + (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) { Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT; } - if (TII->isTailCall(T)) { + if (TII->isTailCall(T) && op.HandleTailcall) { Opc = TargetOpcode::PATCHABLE_TAIL_CALL; } if (Opc != 0) { @@ -124,11 +139,10 @@ void XRayInstrumentation::prependRetWithPatchableExit( BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)); } } - } } bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { - auto &F = *MF.getFunction(); + auto &F = MF.getFunction(); auto InstrAttr = F.getFnAttribute("function-instrument"); bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) && InstrAttr.isStringAttribute() && @@ -143,7 +157,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { // Count the number of MachineInstr`s in MachineFunction int64_t MICount = 0; - for (const auto& MBB : MF) + for (const auto &MBB : MF) MICount += MBB.size(); // Check if we have a loop. @@ -180,20 +194,35 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { case Triple::ArchType::arm: case Triple::ArchType::thumb: case Triple::ArchType::aarch64: - case Triple::ArchType::ppc64le: case Triple::ArchType::mips: case Triple::ArchType::mipsel: case Triple::ArchType::mips64: - case Triple::ArchType::mips64el: + case Triple::ArchType::mips64el: { // For the architectures which don't have a single return instruction - prependRetWithPatchableExit(MF, TII); + InstrumentationOptions op; + op.HandleTailcall = false; + op.HandleAllReturns = true; + prependRetWithPatchableExit(MF, TII, op); + break; + } + case Triple::ArchType::ppc64le: { + // PPC has conditional returns. Turn them into branch and plain returns. + InstrumentationOptions op; + op.HandleTailcall = false; + op.HandleAllReturns = true; + replaceRetWithPatchableRet(MF, TII, op); break; - default: + } + default: { // For the architectures that have a single return instruction (such as // RETQ on x86_64). - replaceRetWithPatchableRet(MF, TII); + InstrumentationOptions op; + op.HandleTailcall = true; + op.HandleAllReturns = false; + replaceRetWithPatchableRet(MF, TII, op); break; } + } return true; } |